Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aws
GitHub Repository: aws/aws-cli
Path: blob/develop/awscli/customizations/cloudtrail/validation.py
2630 views
1
# Copyright 2012-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License"). You
4
# may not use this file except in compliance with the License. A copy of
5
# the License is located at
6
#
7
# http://aws.amazon.com/apache2.0/
8
#
9
# or in the "license" file accompanying this file. This file is
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
# ANY KIND, either express or implied. See the License for the specific
12
# language governing permissions and limitations under the License.
13
import base64
14
import binascii
15
import json
16
import hashlib
17
import logging
18
import re
19
import sys
20
import zlib
21
from zlib import error as ZLibError
22
from datetime import timedelta
23
from dateutil import tz, parser
24
25
from pyasn1.error import PyAsn1Error
26
import rsa
27
28
from awscli.customizations.cloudtrail.utils import get_trail_by_arn, \
29
get_account_id_from_arn
30
from awscli.customizations.commands import BasicCommand
31
from botocore.exceptions import ClientError
32
from awscli.compat import get_current_datetime
33
from awscli.schema import ParameterRequiredError
34
from awscli.utils import create_nested_client
35
36
LOG = logging.getLogger(__name__)
37
DATE_FORMAT = '%Y%m%dT%H%M%SZ'
38
DISPLAY_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
39
40
41
def format_date(date):
42
"""Returns a formatted date string in a CloudTrail date format"""
43
return date.strftime(DATE_FORMAT)
44
45
46
def format_display_date(date):
47
"""Returns a formatted date string meant for CLI output"""
48
return date.strftime(DISPLAY_DATE_FORMAT)
49
50
51
def normalize_date(date):
52
"""Returns a normalized date using a UTC timezone"""
53
return date.replace(tzinfo=tz.tzutc())
54
55
56
def extract_digest_key_date(digest_s3_key):
57
"""Extract the timestamp portion of a manifest file.
58
59
Manifest file names take the following form:
60
AWSLogs/{account}/CloudTrail-Digest/{region}/{ymd}/{account}_CloudTrail \
61
-Digest_{region}_{name}_region_{date}.json.gz
62
"""
63
return digest_s3_key[-24:-8]
64
65
66
def parse_date(date_string):
67
try:
68
return parser.parse(date_string)
69
except ValueError:
70
raise ValueError('Unable to parse date value: %s' % date_string)
71
72
73
def assert_cloudtrail_arn_is_valid(trail_arn):
74
"""Ensures that the arn looks correct.
75
76
ARNs look like: arn:aws:cloudtrail:us-east-1:123456789012:trail/foo"""
77
pattern = re.compile(r'arn:.+:cloudtrail:.+:\d{12}:trail/.+')
78
if not pattern.match(trail_arn):
79
raise ValueError('Invalid trail ARN provided: %s' % trail_arn)
80
81
82
def create_digest_traverser(
83
cloudtrail_client,
84
organization_client,
85
s3_client_provider,
86
trail_arn,
87
trail_source_region=None,
88
on_invalid=None,
89
on_gap=None,
90
on_missing=None,
91
bucket=None,
92
prefix=None,
93
account_id=None,
94
):
95
"""Creates a CloudTrail DigestTraverser and its object graph.
96
97
:type cloudtrail_client: botocore.client.CloudTrail
98
:param cloudtrail_client: Client used to connect to CloudTrail
99
:type organization_client: botocore.client.organizations
100
:param organization_client: Client used to connect to Organizations
101
:type s3_client_provider: S3ClientProvider
102
:param s3_client_provider: Used to create Amazon S3 client per/region.
103
:param trail_arn: CloudTrail trail ARN
104
:param trail_source_region: The scanned region of a trail.
105
:param on_invalid: Callback that is invoked when validating a digest fails.
106
:param on_gap: Callback that is invoked when a digest has no link to the
107
previous digest, but there are more digests to validate. This can
108
happen when a trail is disabled for a period of time.
109
:param on_missing: Callback that is invoked when a digest file has been
110
deleted from Amazon S3 but is supposed to be present.
111
:param bucket: Amazon S3 bucket of the trail if it is different than the
112
bucket that is currently associated with the trail.
113
:param prefix: bucket: Key prefix prepended to each digest and log placed
114
in the Amazon S3 bucket if it is different than the prefix that is
115
currently associated with the trail.
116
:param account_id: The account id for which the digest files are
117
validated. For normal trails this is the caller account, for
118
organization trails it is the member accout.
119
120
``on_gap``, ``on_invalid``, and ``on_missing`` callbacks are invoked with
121
the following named arguments:
122
123
- ``bucket`: The next S3 bucket.
124
- ``next_key``: (optional) Next digest key that was found in the bucket.
125
- ``next_end_date``: (optional) End date of the next found digest.
126
- ``last_key``: The last digest key that was found.
127
- ``last_start_date``: (optional) Start date of last found digest.
128
- ``message``: (optional) Message string about the notification.
129
"""
130
assert_cloudtrail_arn_is_valid(trail_arn)
131
organization_id = None
132
if bucket is None:
133
# Determine the bucket and prefix based on the trail arn.
134
trail_info = get_trail_by_arn(cloudtrail_client, trail_arn)
135
LOG.debug('Loaded trail info: %s', trail_info)
136
bucket = trail_info['S3BucketName']
137
prefix = trail_info.get('S3KeyPrefix', None)
138
is_org_trail = trail_info.get('IsOrganizationTrail')
139
if is_org_trail:
140
if not account_id:
141
raise ParameterRequiredError(
142
"Missing required parameter for organization "
143
"trail: '--account-id'")
144
organization_id = organization_client.describe_organization()[
145
'Organization']['Id']
146
147
# Determine the region from the ARN (e.g., arn:aws:cloudtrail:REGION:...)
148
trail_region = trail_arn.split(':')[3]
149
# Determine the name from the ARN (the last part after "/")
150
trail_name = trail_arn.split('/')[-1]
151
# If account id is not specified parse it from trail ARN
152
if not account_id:
153
account_id = get_account_id_from_arn(trail_arn)
154
155
digest_provider = DigestProvider(
156
account_id=account_id, trail_name=trail_name,
157
s3_client_provider=s3_client_provider,
158
trail_source_region=trail_source_region,
159
trail_home_region=trail_region,
160
organization_id=organization_id)
161
return DigestTraverser(
162
digest_provider=digest_provider, starting_bucket=bucket,
163
starting_prefix=prefix, on_invalid=on_invalid, on_gap=on_gap,
164
on_missing=on_missing,
165
public_key_provider=PublicKeyProvider(cloudtrail_client))
166
167
168
class S3ClientProvider(object):
169
"""Creates Amazon S3 clients and determines the region name of a client.
170
171
This class will cache the location constraints of previously requested
172
buckets and cache previously created clients for the same region.
173
"""
174
def __init__(self, session, get_bucket_location_region='us-east-1'):
175
self._session = session
176
self._get_bucket_location_region = get_bucket_location_region
177
self._client_cache = {}
178
self._region_cache = {}
179
180
def get_client(self, bucket_name):
181
"""Creates an S3 client that can work with the given bucket name"""
182
region_name = self._get_bucket_region(bucket_name)
183
return self._create_client(region_name)
184
185
def _get_bucket_region(self, bucket_name):
186
"""Returns the region of a bucket"""
187
if bucket_name not in self._region_cache:
188
client = self._create_client(self._get_bucket_location_region)
189
result = client.get_bucket_location(Bucket=bucket_name)
190
region = result['LocationConstraint'] or 'us-east-1'
191
self._region_cache[bucket_name] = region
192
return self._region_cache[bucket_name]
193
194
def _create_client(self, region_name):
195
"""Creates an Amazon S3 client for the given region name"""
196
if region_name not in self._client_cache:
197
client = create_nested_client(self._session, 's3', region_name=region_name)
198
# Remove the CLI error event that prevents exceptions.
199
self._client_cache[region_name] = client
200
return self._client_cache[region_name]
201
202
203
class DigestError(ValueError):
204
"""Exception raised when a digest fails to validate"""
205
pass
206
207
208
class DigestSignatureError(DigestError):
209
"""Exception raised when a digest signature is invalid"""
210
def __init__(self, bucket, key):
211
message = ('Digest file\ts3://%s/%s\tINVALID: signature verification '
212
'failed') % (bucket, key)
213
super(DigestSignatureError, self).__init__(message)
214
215
216
class InvalidDigestFormat(DigestError):
217
"""Exception raised when a digest has an invalid format"""
218
def __init__(self, bucket, key):
219
message = 'Digest file\ts3://%s/%s\tINVALID: invalid format' % (bucket,
220
key)
221
super(InvalidDigestFormat, self).__init__(message)
222
223
224
class PublicKeyProvider(object):
225
"""Retrieves public keys from CloudTrail within a date range."""
226
def __init__(self, cloudtrail_client):
227
self._cloudtrail_client = cloudtrail_client
228
229
def get_public_keys(self, start_date, end_date):
230
"""Loads public keys in a date range into a returned dict.
231
232
:type start_date: datetime
233
:param start_date: Start date of a date range.
234
:type end_date: datetime
235
:param end_date: End date of a date range.
236
:rtype: dict
237
:return: Returns a dict where each key is the fingerprint of the
238
public key, and each value is a dict of public key data.
239
"""
240
public_keys = self._cloudtrail_client.list_public_keys(
241
StartTime=start_date, EndTime=end_date)
242
public_keys_in_range = public_keys['PublicKeyList']
243
LOG.debug('Loaded public keys in range: %s', public_keys_in_range)
244
return dict((key['Fingerprint'], key) for key in public_keys_in_range)
245
246
247
class DigestProvider(object):
248
"""
249
Retrieves digest keys and digests from Amazon S3.
250
251
This class is responsible for determining the full list of digest files
252
in a bucket and loading digests from the bucket into a JSON decoded
253
dict. This class is not responsible for validation or iterating from
254
one digest to the next.
255
"""
256
257
def __init__(
258
self,
259
s3_client_provider,
260
account_id,
261
trail_name,
262
trail_home_region,
263
trail_source_region=None,
264
organization_id=None,
265
):
266
self._client_provider = s3_client_provider
267
self.trail_name = trail_name
268
self.account_id = account_id
269
self.trail_home_region = trail_home_region
270
self.trail_source_region = trail_source_region or trail_home_region
271
self.organization_id = organization_id
272
273
def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date):
274
"""Returns a list of digest keys in the date range.
275
276
This method uses a list_objects API call and provides a Marker
277
parameter that is calculated based on the start_date provided.
278
Amazon S3 then returns all keys in the bucket that start after
279
the given key (non-inclusive). We then iterate over the keys
280
until the date extracted from the yielded keys is greater than
281
the given end_date.
282
"""
283
digests = []
284
marker = self._create_digest_key(start_date, prefix)
285
s3_digest_files_prefix = self._create_digest_prefix(start_date, prefix)
286
client = self._client_provider.get_client(bucket)
287
paginator = client.get_paginator('list_objects')
288
page_iterator = paginator.paginate(Bucket=bucket, Marker=marker, Prefix=s3_digest_files_prefix)
289
key_filter = page_iterator.search('Contents[*].Key')
290
# Create a target start end end date
291
target_start_date = format_date(normalize_date(start_date))
292
# Add one hour to the end_date to get logs that spilled over to next.
293
target_end_date = format_date(
294
normalize_date(end_date + timedelta(hours=1)))
295
# Ensure digests are from the same trail.
296
digest_key_regex = re.compile(self._create_digest_key_regex(prefix))
297
for key in key_filter:
298
if key and digest_key_regex.match(key):
299
# Use a lexicographic comparison to know when to stop.
300
extracted_date = extract_digest_key_date(key)
301
if extracted_date > target_end_date:
302
break
303
# Only append digests after the start date.
304
if extracted_date >= target_start_date:
305
digests.append(key)
306
return digests
307
308
def fetch_digest(self, bucket, key):
309
"""Loads a digest by key from S3.
310
311
Returns the JSON decode data and GZIP inflated raw content.
312
"""
313
client = self._client_provider.get_client(bucket)
314
result = client.get_object(Bucket=bucket, Key=key)
315
try:
316
digest = zlib.decompress(result['Body'].read(),
317
zlib.MAX_WBITS | 16)
318
digest_data = json.loads(digest.decode())
319
except (ValueError, ZLibError):
320
# Cannot gzip decode or JSON parse.
321
raise InvalidDigestFormat(bucket, key)
322
# Add the expected digest signature and algorithm to the dict.
323
if 'signature' not in result['Metadata'] \
324
or 'signature-algorithm' not in result['Metadata']:
325
raise DigestSignatureError(bucket, key)
326
digest_data['_signature'] = result['Metadata']['signature']
327
digest_data['_signature_algorithm'] = \
328
result['Metadata']['signature-algorithm']
329
return digest_data, digest
330
331
def _create_digest_key(self, start_date, key_prefix):
332
"""Computes an Amazon S3 key based on the provided data.
333
334
The computed is what would have been placed in the S3 bucket if
335
a log digest were created at a specific time. This computed key
336
does not have to actually exist as it will only be used to as
337
a Marker parameter in a list_objects call.
338
339
:return: Returns a computed key as a string.
340
"""
341
# Subtract one minute to ensure the dates are inclusive.
342
date = start_date - timedelta(minutes=1)
343
template = 'AWSLogs/'
344
template_params = {
345
'account_id': self.account_id,
346
'date': format_date(date),
347
'ymd': date.strftime('%Y/%m/%d'),
348
'source_region': self.trail_source_region,
349
'home_region': self.trail_home_region,
350
'name': self.trail_name
351
}
352
if self.organization_id:
353
template += '{organization_id}/'
354
template_params['organization_id'] = self.organization_id
355
template += (
356
'{account_id}/CloudTrail-Digest/{source_region}/'
357
'{ymd}/{account_id}_CloudTrail-Digest_{source_region}_{name}_'
358
'{home_region}_{date}.json.gz'
359
)
360
key = template.format(**template_params)
361
if key_prefix:
362
key = key_prefix + '/' + key
363
return key
364
365
def _create_digest_prefix(self, start_date, key_prefix):
366
"""Creates an S3 prefix to scope listing to trail's region.
367
368
:return: Returns a prefix string to limit S3 listing scope.
369
"""
370
template = 'AWSLogs/'
371
template_params = {
372
'account_id': self.account_id,
373
'source_region': self.trail_source_region
374
}
375
if self.organization_id:
376
template += '{organization_id}/'
377
template_params['organization_id'] = self.organization_id
378
template += '{account_id}/CloudTrail-Digest/{source_region}'
379
prefix = template.format(**template_params)
380
if key_prefix:
381
prefix = key_prefix + '/' + prefix
382
return prefix
383
384
def _create_digest_key_regex(self, key_prefix):
385
"""Creates a regular expression used to match against S3 keys"""
386
template = 'AWSLogs/'
387
template_params = {
388
'account_id': re.escape(self.account_id),
389
'source_region': re.escape(self.trail_source_region),
390
'home_region': re.escape(self.trail_home_region),
391
'name': re.escape(self.trail_name)
392
}
393
if self.organization_id:
394
template += '{organization_id}/'
395
template_params['organization_id'] = self.organization_id
396
template += (
397
'{account_id}/CloudTrail\\-Digest/{source_region}/'
398
'\\d+/\\d+/\\d+/{account_id}_CloudTrail\\-Digest_'
399
'{source_region}_{name}_{home_region}_.+\\.json\\.gz'
400
)
401
key = template.format(**template_params)
402
if key_prefix:
403
key = re.escape(key_prefix) + '/' + key
404
return '^' + key + '$'
405
406
407
class DigestTraverser(object):
408
"""Retrieves and validates digests within a date range."""
409
# These keys are required to be present before validating the contents
410
# of a digest.
411
required_digest_keys = ['digestPublicKeyFingerprint', 'digestS3Bucket',
412
'digestS3Object', 'previousDigestSignature',
413
'digestEndTime', 'digestStartTime']
414
415
def __init__(self, digest_provider, starting_bucket, starting_prefix,
416
public_key_provider, digest_validator=None,
417
on_invalid=None, on_gap=None, on_missing=None):
418
"""
419
:type digest_provider: DigestProvider
420
:param digest_provider: DigestProvider object
421
:param starting_bucket: S3 bucket where the digests are stored.
422
:param starting_prefix: An optional prefix applied to each S3 key.
423
:param public_key_provider: Provides public keys for a range.
424
:param digest_validator: Validates digest using a validate method.
425
:param on_invalid: Callback invoked when a digest is invalid.
426
:param on_gap: Callback invoked when a digest has no parent, but
427
there are still more digests to validate.
428
:param on_missing: Callback invoked when a digest file is missing.
429
"""
430
self.starting_bucket = starting_bucket
431
self.starting_prefix = starting_prefix
432
self.digest_provider = digest_provider
433
self._public_key_provider = public_key_provider
434
self._on_gap = on_gap
435
self._on_invalid = on_invalid
436
self._on_missing = on_missing
437
if digest_validator is None:
438
digest_validator = Sha256RSADigestValidator()
439
self._digest_validator = digest_validator
440
441
def traverse(self, start_date, end_date=None):
442
"""Creates and returns a generator that yields validated digest data.
443
444
Each yielded digest dictionary contains information about the digest
445
and the log file associated with the digest. Digest files are validated
446
before they are yielded. Whether or not the digest is successfully
447
validated is stated in the "isValid" key value pair of the yielded
448
dictionary.
449
450
:type start_date: datetime
451
:param start_date: Date to start validating from (inclusive).
452
:type start_date: datetime
453
:param end_date: Date to stop validating at (inclusive).
454
"""
455
if end_date is None:
456
end_date = get_current_datetime()
457
end_date = normalize_date(end_date)
458
start_date = normalize_date(start_date)
459
bucket = self.starting_bucket
460
prefix = self.starting_prefix
461
digests = self._load_digests(bucket, prefix, start_date, end_date)
462
public_keys = self._load_public_keys(start_date, end_date)
463
key, end_date = self._get_last_digest(digests)
464
last_start_date = end_date
465
while key and start_date <= last_start_date:
466
try:
467
digest, end_date = self._load_and_validate_digest(
468
public_keys, bucket, key)
469
last_start_date = normalize_date(
470
parse_date(digest['digestStartTime']))
471
previous_bucket = digest.get('previousDigestS3Bucket', None)
472
yield digest
473
if previous_bucket is None:
474
# The chain is broken, so find next in digest store.
475
key, end_date = self._find_next_digest(
476
digests=digests, bucket=bucket, last_key=key,
477
last_start_date=last_start_date, cb=self._on_gap,
478
is_cb_conditional=True)
479
else:
480
key = digest['previousDigestS3Object']
481
if previous_bucket != bucket:
482
bucket = previous_bucket
483
# The bucket changed so reload the digest list.
484
digests = self._load_digests(
485
bucket, prefix, start_date, end_date)
486
except ClientError as e:
487
if e.response['Error']['Code'] != 'NoSuchKey':
488
raise e
489
key, end_date = self._find_next_digest(
490
digests=digests, bucket=bucket, last_key=key,
491
last_start_date=last_start_date, cb=self._on_missing,
492
message=str(e))
493
except DigestError as e:
494
key, end_date = self._find_next_digest(
495
digests=digests, bucket=bucket, last_key=key,
496
last_start_date=last_start_date, cb=self._on_invalid,
497
message=str(e))
498
except Exception as e:
499
# Any other unexpected errors.
500
key, end_date = self._find_next_digest(
501
digests=digests, bucket=bucket, last_key=key,
502
last_start_date=last_start_date, cb=self._on_invalid,
503
message='Digest file\ts3://%s/%s\tINVALID: %s'
504
% (bucket, key, str(e)))
505
506
def _load_digests(self, bucket, prefix, start_date, end_date):
507
return self.digest_provider.load_digest_keys_in_range(
508
bucket=bucket, prefix=prefix,
509
start_date=start_date, end_date=end_date)
510
511
def _find_next_digest(self, digests, bucket, last_key, last_start_date,
512
cb=None, is_cb_conditional=False, message=None):
513
"""Finds the next digest in the bucket and invokes any callback."""
514
next_key, next_end_date = self._get_last_digest(digests, last_key)
515
if cb and (not is_cb_conditional or next_key):
516
cb(bucket=bucket, next_key=next_key, last_key=last_key,
517
next_end_date=next_end_date, last_start_date=last_start_date,
518
message=message)
519
return next_key, next_end_date
520
521
def _get_last_digest(self, digests, before_key=None):
522
"""Finds the previous digest key (either the last or before before_key)
523
524
If no key is provided, the last digest is used. If a digest is found,
525
the end date of the provider is adjusted to match the found key's end
526
date.
527
"""
528
if not digests:
529
return None, None
530
elif before_key is None:
531
next_key = digests.pop()
532
next_key_date = normalize_date(
533
parse_date(extract_digest_key_date(next_key)))
534
return next_key, next_key_date
535
# find a key before the given key.
536
before_key_date = parse_date(extract_digest_key_date(before_key))
537
while digests:
538
next_key = digests.pop()
539
next_key_date = normalize_date(
540
parse_date(extract_digest_key_date(next_key)))
541
if next_key_date < before_key_date:
542
LOG.debug("Next found key: %s", next_key)
543
return next_key, next_key_date
544
return None, None
545
546
def _load_and_validate_digest(self, public_keys, bucket, key):
547
"""Loads and validates a digest from S3.
548
549
:param public_keys: Public key dictionary of fingerprint to dict.
550
:return: Returns a tuple of the digest data as a dict and end_date
551
:rtype: tuple
552
"""
553
digest_data, digest = self.digest_provider.fetch_digest(bucket, key)
554
for required_key in self.required_digest_keys:
555
if required_key not in digest_data:
556
raise InvalidDigestFormat(bucket, key)
557
# Ensure the bucket and key are the same as what's expected.
558
if digest_data['digestS3Bucket'] != bucket \
559
or digest_data['digestS3Object'] != key:
560
raise DigestError(
561
('Digest file\ts3://%s/%s\tINVALID: has been moved from its '
562
'original location') % (bucket, key))
563
# Get the public keys in the given time range.
564
fingerprint = digest_data['digestPublicKeyFingerprint']
565
if fingerprint not in public_keys:
566
raise DigestError(
567
('Digest file\ts3://%s/%s\tINVALID: public key not found in '
568
'region %s for fingerprint %s') %
569
(bucket, key, self.digest_provider.trail_home_region,
570
fingerprint))
571
public_key_hex = public_keys[fingerprint]['Value']
572
self._digest_validator.validate(
573
bucket, key, public_key_hex, digest_data, digest)
574
end_date = normalize_date(parse_date(digest_data['digestEndTime']))
575
return digest_data, end_date
576
577
def _load_public_keys(self, start_date, end_date):
578
public_keys = self._public_key_provider.get_public_keys(
579
start_date, end_date)
580
if not public_keys:
581
raise RuntimeError(
582
'No public keys found between %s and %s' %
583
(format_display_date(start_date),
584
format_display_date(end_date)))
585
return public_keys
586
587
588
class Sha256RSADigestValidator(object):
589
"""
590
Validates SHA256withRSA signed digests.
591
592
The result of validating the digest is inserted into the digest_data
593
dictionary using the isValid key value pair.
594
"""
595
596
def validate(self, bucket, key, public_key, digest_data, inflated_digest):
597
"""Validates a digest file.
598
599
Throws a DigestError when the digest is invalid.
600
601
:param bucket: Bucket of the digest file
602
:param key: Key of the digest file
603
:param public_key: Public key bytes.
604
:param digest_data: Dict of digest data returned when JSON
605
decoding a manifest.
606
:param inflated_digest: Inflated digest file contents as bytes.
607
"""
608
try:
609
decoded_key = base64.b64decode(public_key)
610
public_key = rsa.PublicKey.load_pkcs1(decoded_key, format='DER')
611
to_sign = self._create_string_to_sign(digest_data, inflated_digest)
612
signature_bytes = binascii.unhexlify(digest_data['_signature'])
613
rsa.verify(to_sign, signature_bytes, public_key)
614
except PyAsn1Error:
615
raise DigestError(
616
('Digest file\ts3://%s/%s\tINVALID: Unable to load PKCS #1 key'
617
' with fingerprint %s')
618
% (bucket, key, digest_data['digestPublicKeyFingerprint']))
619
except rsa.pkcs1.VerificationError:
620
# Note from the Python-RSA docs: Never display the stack trace of
621
# a rsa.pkcs1.VerificationError exception. It shows where in the
622
# code the exception occurred, and thus leaks information about
623
# the key.
624
raise DigestSignatureError(bucket, key)
625
626
def _create_string_to_sign(self, digest_data, inflated_digest):
627
previous_signature = digest_data['previousDigestSignature']
628
if previous_signature is None:
629
# The value must be 'null' to match the Java implementation.
630
previous_signature = 'null'
631
string_to_sign = "%s\n%s/%s\n%s\n%s" % (
632
digest_data['digestEndTime'],
633
digest_data['digestS3Bucket'],
634
digest_data['digestS3Object'],
635
hashlib.sha256(inflated_digest).hexdigest(),
636
previous_signature)
637
LOG.debug('Digest string to sign: %s', string_to_sign)
638
return string_to_sign.encode()
639
640
641
class CloudTrailValidateLogs(BasicCommand):
642
"""
643
Validates log digests and log files, optionally saving them to disk.
644
"""
645
NAME = 'validate-logs'
646
DESCRIPTION = """
647
Validates CloudTrail logs for a given period of time.
648
649
This command uses the digest files delivered to your S3 bucket to perform
650
the validation.
651
652
The AWS CLI allows you to detect the following types of changes:
653
654
- Modification or deletion of CloudTrail log files.
655
- Modification or deletion of CloudTrail digest files.
656
657
To validate log files with the AWS CLI, the following preconditions must
658
be met:
659
660
- You must have online connectivity to AWS.
661
- You must have read access to the S3 bucket that contains the digest and
662
log files.
663
- The digest and log files must not have been moved from the original S3
664
location where CloudTrail delivered them.
665
- For organization trails you must have access to describe-organization to
666
validate digest files
667
668
When you disable Log File Validation, the chain of digest files is broken
669
after one hour. CloudTrail will not digest log files that were delivered
670
during a period in which the Log File Validation feature was disabled.
671
For example, if you enable Log File Validation on January 1, disable it
672
on January 2, and re-enable it on January 10, digest files will not be
673
created for the log files delivered from January 3 to January 9. The same
674
applies whenever you stop CloudTrail logging or delete a trail.
675
676
.. note::
677
678
Log files that have been downloaded to local disk cannot be validated
679
with the AWS CLI. The CLI will download all log files each time this
680
command is executed.
681
682
.. note::
683
684
This command requires that the role executing the command has
685
permission to call ListObjects, GetObject, and GetBucketLocation for
686
each bucket referenced by the trail.
687
688
"""
689
690
ARG_TABLE = [
691
{'name': 'trail-arn', 'required': True, 'cli_type_name': 'string',
692
'help_text': 'Specifies the ARN of the trail to be validated'},
693
{'name': 'start-time', 'required': True, 'cli_type_name': 'string',
694
'help_text': ('Specifies that log files delivered on or after the '
695
'specified UTC timestamp value will be validated. '
696
'Example: "2015-01-08T05:21:42Z".')},
697
{'name': 'end-time', 'cli_type_name': 'string',
698
'help_text': ('Optionally specifies that log files delivered on or '
699
'before the specified UTC timestamp value will be '
700
'validated. The default value is the current time. '
701
'Example: "2015-01-08T12:31:41Z".')},
702
{'name': 's3-bucket', 'cli_type_name': 'string',
703
'help_text': ('Optionally specifies the S3 bucket where the digest '
704
'files are stored. If a bucket name is not specified, '
705
'the CLI will retrieve it by calling describe_trails')},
706
{'name': 's3-prefix', 'cli_type_name': 'string',
707
'help_text': ('Optionally specifies the optional S3 prefix where the '
708
'digest files are stored. If not specified, the CLI '
709
'will determine the prefix automatically by calling '
710
'describe_trails.')},
711
{'name': 'account-id', 'cli_type_name': 'string',
712
'help_text': ('Optionally specifies the account for validating logs. '
713
'This parameter is needed for organization trails '
714
'for validating logs for specific account inside an '
715
'organization')},
716
{'name': 'verbose', 'cli_type_name': 'boolean',
717
'action': 'store_true',
718
'help_text': 'Display verbose log validation information'}
719
]
720
721
def __init__(self, session):
722
super(CloudTrailValidateLogs, self).__init__(session)
723
self.trail_arn = None
724
self.is_verbose = False
725
self.start_time = None
726
self.end_time = None
727
self.s3_bucket = None
728
self.s3_prefix = None
729
self.s3_client_provider = None
730
self.cloudtrail_client = None
731
self.account_id = None
732
self._source_region = None
733
self._valid_digests = 0
734
self._invalid_digests = 0
735
self._valid_logs = 0
736
self._invalid_logs = 0
737
self._is_last_status_double_space = True
738
self._found_start_time = None
739
self._found_end_time = None
740
741
def _run_main(self, args, parsed_globals):
742
self.handle_args(args)
743
self.setup_services(parsed_globals)
744
self._call()
745
if self._invalid_digests > 0 or self._invalid_logs > 0:
746
return 1
747
return 0
748
749
def handle_args(self, args):
750
self.trail_arn = args.trail_arn
751
self.is_verbose = args.verbose
752
self.s3_bucket = args.s3_bucket
753
self.s3_prefix = args.s3_prefix
754
self.account_id = args.account_id
755
self.start_time = normalize_date(parse_date(args.start_time))
756
if args.end_time:
757
self.end_time = normalize_date(parse_date(args.end_time))
758
else:
759
self.end_time = normalize_date(get_current_datetime())
760
if self.start_time > self.end_time:
761
raise ValueError(('Invalid time range specified: start-time must '
762
'occur before end-time'))
763
# Found start time always defaults to the given start time. This value
764
# may change if the earliest found digest is after the given start
765
# time. Note that the summary output report of what date ranges were
766
# actually found is only shown if a valid digest is encountered,
767
# thereby setting self._found_end_time to a value.
768
self._found_start_time = self.start_time
769
770
def setup_services(self, parsed_globals):
771
self._source_region = parsed_globals.region
772
# Use the the same region as the region of the CLI to get locations.
773
self.s3_client_provider = S3ClientProvider(
774
self._session, self._source_region)
775
client_args = {'region_name': parsed_globals.region,
776
'verify': parsed_globals.verify_ssl}
777
self.organization_client = create_nested_client(
778
self._session, 'organizations', **client_args)
779
780
if parsed_globals.endpoint_url is not None:
781
client_args['endpoint_url'] = parsed_globals.endpoint_url
782
self.cloudtrail_client = create_nested_client(
783
self._session, 'cloudtrail', **client_args)
784
785
def _call(self):
786
traverser = create_digest_traverser(
787
trail_arn=self.trail_arn, cloudtrail_client=self.cloudtrail_client,
788
organization_client=self.organization_client,
789
trail_source_region=self._source_region,
790
s3_client_provider=self.s3_client_provider, bucket=self.s3_bucket,
791
prefix=self.s3_prefix, on_missing=self._on_missing_digest,
792
on_invalid=self._on_invalid_digest, on_gap=self._on_digest_gap,
793
account_id=self.account_id)
794
self._write_startup_text()
795
digests = traverser.traverse(self.start_time, self.end_time)
796
for digest in digests:
797
# Only valid digests are yielded and only valid digests can adjust
798
# the found times that are reported in the CLI output summary.
799
self._track_found_times(digest)
800
self._valid_digests += 1
801
self._write_status(
802
'Digest file\ts3://%s/%s\tvalid'
803
% (digest['digestS3Bucket'], digest['digestS3Object']))
804
if not digest['logFiles']:
805
continue
806
for log in digest['logFiles']:
807
self._download_log(log)
808
self._write_summary_text()
809
810
def _track_found_times(self, digest):
811
# Track the earliest found start time, but do not use a date before
812
# the user supplied start date.
813
digest_start_time = parse_date(digest['digestStartTime'])
814
if digest_start_time > self.start_time:
815
self._found_start_time = digest_start_time
816
# Only use the last found end time if it is less than the
817
# user supplied end time (or the current date).
818
if not self._found_end_time:
819
digest_end_time = parse_date(digest['digestEndTime'])
820
self._found_end_time = min(digest_end_time, self.end_time)
821
822
def _download_log(self, log):
823
""" Download a log, decompress, and compare SHA256 checksums"""
824
try:
825
# Create a client that can work with this bucket.
826
client = self.s3_client_provider.get_client(log['s3Bucket'])
827
response = client.get_object(
828
Bucket=log['s3Bucket'], Key=log['s3Object'])
829
gzip_inflater = zlib.decompressobj(zlib.MAX_WBITS | 16)
830
rolling_hash = hashlib.sha256()
831
for chunk in iter(lambda: response['Body'].read(2048), b""):
832
data = gzip_inflater.decompress(chunk)
833
rolling_hash.update(data)
834
remaining_data = gzip_inflater.flush()
835
if remaining_data:
836
rolling_hash.update(remaining_data)
837
computed_hash = rolling_hash.hexdigest()
838
if computed_hash != log['hashValue']:
839
self._on_log_invalid(log)
840
else:
841
self._valid_logs += 1
842
self._write_status(('Log file\ts3://%s/%s\tvalid'
843
% (log['s3Bucket'], log['s3Object'])))
844
except ClientError as e:
845
if e.response['Error']['Code'] != 'NoSuchKey':
846
raise
847
self._on_missing_log(log)
848
except Exception:
849
self._on_invalid_log_format(log)
850
851
def _write_status(self, message, is_error=False):
852
if is_error:
853
if self._is_last_status_double_space:
854
sys.stderr.write("%s\n\n" % message)
855
else:
856
sys.stderr.write("\n%s\n\n" % message)
857
self._is_last_status_double_space = True
858
elif self.is_verbose:
859
self._is_last_status_double_space = False
860
sys.stdout.write("%s\n" % message)
861
862
def _write_startup_text(self):
863
sys.stdout.write(
864
'Validating log files for trail %s between %s and %s\n\n'
865
% (self.trail_arn, format_display_date(self.start_time),
866
format_display_date(self.end_time)))
867
868
def _write_summary_text(self):
869
if not self._is_last_status_double_space:
870
sys.stdout.write('\n')
871
sys.stdout.write('Results requested for %s to %s\n'
872
% (format_display_date(self.start_time),
873
format_display_date(self.end_time)))
874
if not self._valid_digests and not self._invalid_digests:
875
sys.stdout.write('No digests found\n')
876
return
877
if not self._found_start_time or not self._found_end_time:
878
sys.stdout.write('No valid digests found in range\n')
879
else:
880
sys.stdout.write('Results found for %s to %s:\n'
881
% (format_display_date(self._found_start_time),
882
format_display_date(self._found_end_time)))
883
self._write_ratio(self._valid_digests, self._invalid_digests, 'digest')
884
self._write_ratio(self._valid_logs, self._invalid_logs, 'log')
885
sys.stdout.write('\n')
886
887
def _write_ratio(self, valid, invalid, name):
888
total = valid + invalid
889
if total > 0:
890
sys.stdout.write('\n%d/%d %s files valid' % (valid, total, name))
891
if invalid > 0:
892
sys.stdout.write(', %d/%d %s files INVALID' % (invalid, total,
893
name))
894
895
def _on_missing_digest(self, bucket, last_key, **kwargs):
896
self._invalid_digests += 1
897
self._write_status('Digest file\ts3://%s/%s\tINVALID: not found'
898
% (bucket, last_key), True)
899
900
def _on_digest_gap(self, **kwargs):
901
self._write_status(
902
'No log files were delivered by CloudTrail between %s and %s'
903
% (format_display_date(kwargs['next_end_date']),
904
format_display_date(kwargs['last_start_date'])), True)
905
906
def _on_invalid_digest(self, message, **kwargs):
907
self._invalid_digests += 1
908
self._write_status(message, True)
909
910
def _on_invalid_log_format(self, log_data):
911
self._invalid_logs += 1
912
self._write_status(
913
('Log file\ts3://%s/%s\tINVALID: invalid format'
914
% (log_data['s3Bucket'], log_data['s3Object'])), True)
915
916
def _on_log_invalid(self, log_data):
917
self._invalid_logs += 1
918
self._write_status(
919
"Log file\ts3://%s/%s\tINVALID: hash value doesn't match"
920
% (log_data['s3Bucket'], log_data['s3Object']), True)
921
922
def _on_missing_log(self, log_data):
923
self._invalid_logs += 1
924
self._write_status(
925
'Log file\ts3://%s/%s\tINVALID: not found'
926
% (log_data['s3Bucket'], log_data['s3Object']), True)
927
928