Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
aws
GitHub Repository: aws/aws-cli
Path: blob/develop/awscli/customizations/cloudtrail/validation.py
1567 views
1
# Copyright 2012-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License"). You
4
# may not use this file except in compliance with the License. A copy of
5
# the License is located at
6
#
7
# http://aws.amazon.com/apache2.0/
8
#
9
# or in the "license" file accompanying this file. This file is
10
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11
# ANY KIND, either express or implied. See the License for the specific
12
# language governing permissions and limitations under the License.
13
import base64
14
import binascii
15
import json
16
import hashlib
17
import logging
18
import re
19
import sys
20
import zlib
21
from zlib import error as ZLibError
22
from datetime import timedelta
23
from dateutil import tz, parser
24
25
from pyasn1.error import PyAsn1Error
26
import rsa
27
28
from awscli.customizations.cloudtrail.utils import get_trail_by_arn, \
29
get_account_id_from_arn
30
from awscli.customizations.commands import BasicCommand
31
from botocore.exceptions import ClientError
32
from awscli.compat import get_current_datetime
33
from awscli.schema import ParameterRequiredError
34
from awscli.utils import create_nested_client
35
36
LOG = logging.getLogger(__name__)
37
DATE_FORMAT = '%Y%m%dT%H%M%SZ'
38
DISPLAY_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
39
40
41
def format_date(date):
42
"""Returns a formatted date string in a CloudTrail date format"""
43
return date.strftime(DATE_FORMAT)
44
45
46
def format_display_date(date):
47
"""Returns a formatted date string meant for CLI output"""
48
return date.strftime(DISPLAY_DATE_FORMAT)
49
50
51
def normalize_date(date):
52
"""Returns a normalized date using a UTC timezone"""
53
return date.replace(tzinfo=tz.tzutc())
54
55
56
def extract_digest_key_date(digest_s3_key):
57
"""Extract the timestamp portion of a manifest file.
58
59
Manifest file names take the following form:
60
AWSLogs/{account}/CloudTrail-Digest/{region}/{ymd}/{account}_CloudTrail \
61
-Digest_{region}_{name}_region_{date}.json.gz
62
"""
63
return digest_s3_key[-24:-8]
64
65
66
def parse_date(date_string):
67
try:
68
return parser.parse(date_string)
69
except ValueError:
70
raise ValueError('Unable to parse date value: %s' % date_string)
71
72
73
def assert_cloudtrail_arn_is_valid(trail_arn):
74
"""Ensures that the arn looks correct.
75
76
ARNs look like: arn:aws:cloudtrail:us-east-1:123456789012:trail/foo"""
77
pattern = re.compile(r'arn:.+:cloudtrail:.+:\d{12}:trail/.+')
78
if not pattern.match(trail_arn):
79
raise ValueError('Invalid trail ARN provided: %s' % trail_arn)
80
81
82
def create_digest_traverser(
83
cloudtrail_client,
84
organization_client,
85
s3_client_provider,
86
trail_arn,
87
trail_source_region=None,
88
on_invalid=None,
89
on_gap=None,
90
on_missing=None,
91
bucket=None,
92
prefix=None,
93
account_id=None,
94
):
95
"""Creates a CloudTrail DigestTraverser and its object graph.
96
97
:type cloudtrail_client: botocore.client.CloudTrail
98
:param cloudtrail_client: Client used to connect to CloudTrail
99
:type organization_client: botocore.client.organizations
100
:param organization_client: Client used to connect to Organizations
101
:type s3_client_provider: S3ClientProvider
102
:param s3_client_provider: Used to create Amazon S3 client per/region.
103
:param trail_arn: CloudTrail trail ARN
104
:param trail_source_region: The scanned region of a trail.
105
:param on_invalid: Callback that is invoked when validating a digest fails.
106
:param on_gap: Callback that is invoked when a digest has no link to the
107
previous digest, but there are more digests to validate. This can
108
happen when a trail is disabled for a period of time.
109
:param on_missing: Callback that is invoked when a digest file has been
110
deleted from Amazon S3 but is supposed to be present.
111
:param bucket: Amazon S3 bucket of the trail if it is different than the
112
bucket that is currently associated with the trail.
113
:param prefix: bucket: Key prefix prepended to each digest and log placed
114
in the Amazon S3 bucket if it is different than the prefix that is
115
currently associated with the trail.
116
:param account_id: The account id for which the digest files are
117
validated. For normal trails this is the caller account, for
118
organization trails it is the member accout.
119
120
``on_gap``, ``on_invalid``, and ``on_missing`` callbacks are invoked with
121
the following named arguments:
122
123
- ``bucket`: The next S3 bucket.
124
- ``next_key``: (optional) Next digest key that was found in the bucket.
125
- ``next_end_date``: (optional) End date of the next found digest.
126
- ``last_key``: The last digest key that was found.
127
- ``last_start_date``: (optional) Start date of last found digest.
128
- ``message``: (optional) Message string about the notification.
129
"""
130
assert_cloudtrail_arn_is_valid(trail_arn)
131
organization_id = None
132
if bucket is None:
133
# Determine the bucket and prefix based on the trail arn.
134
trail_info = get_trail_by_arn(cloudtrail_client, trail_arn)
135
LOG.debug('Loaded trail info: %s', trail_info)
136
bucket = trail_info['S3BucketName']
137
prefix = trail_info.get('S3KeyPrefix', None)
138
is_org_trail = trail_info.get('IsOrganizationTrail')
139
if is_org_trail:
140
if not account_id:
141
raise ParameterRequiredError(
142
"Missing required parameter for organization "
143
"trail: '--account-id'")
144
organization_id = organization_client.describe_organization()[
145
'Organization']['Id']
146
147
# Determine the region from the ARN (e.g., arn:aws:cloudtrail:REGION:...)
148
trail_region = trail_arn.split(':')[3]
149
# Determine the name from the ARN (the last part after "/")
150
trail_name = trail_arn.split('/')[-1]
151
# If account id is not specified parse it from trail ARN
152
if not account_id:
153
account_id = get_account_id_from_arn(trail_arn)
154
155
digest_provider = DigestProvider(
156
account_id=account_id, trail_name=trail_name,
157
s3_client_provider=s3_client_provider,
158
trail_source_region=trail_source_region,
159
trail_home_region=trail_region,
160
organization_id=organization_id)
161
return DigestTraverser(
162
digest_provider=digest_provider, starting_bucket=bucket,
163
starting_prefix=prefix, on_invalid=on_invalid, on_gap=on_gap,
164
on_missing=on_missing,
165
public_key_provider=PublicKeyProvider(cloudtrail_client))
166
167
168
class S3ClientProvider(object):
169
"""Creates Amazon S3 clients and determines the region name of a client.
170
171
This class will cache the location constraints of previously requested
172
buckets and cache previously created clients for the same region.
173
"""
174
def __init__(self, session, get_bucket_location_region='us-east-1'):
175
self._session = session
176
self._get_bucket_location_region = get_bucket_location_region
177
self._client_cache = {}
178
self._region_cache = {}
179
180
def get_client(self, bucket_name):
181
"""Creates an S3 client that can work with the given bucket name"""
182
region_name = self._get_bucket_region(bucket_name)
183
return self._create_client(region_name)
184
185
def _get_bucket_region(self, bucket_name):
186
"""Returns the region of a bucket"""
187
if bucket_name not in self._region_cache:
188
client = self._create_client(self._get_bucket_location_region)
189
result = client.get_bucket_location(Bucket=bucket_name)
190
region = result['LocationConstraint'] or 'us-east-1'
191
self._region_cache[bucket_name] = region
192
return self._region_cache[bucket_name]
193
194
def _create_client(self, region_name):
195
"""Creates an Amazon S3 client for the given region name"""
196
if region_name not in self._client_cache:
197
client = create_nested_client(self._session, 's3', region_name=region_name)
198
# Remove the CLI error event that prevents exceptions.
199
self._client_cache[region_name] = client
200
return self._client_cache[region_name]
201
202
203
class DigestError(ValueError):
204
"""Exception raised when a digest fails to validate"""
205
pass
206
207
208
class DigestSignatureError(DigestError):
209
"""Exception raised when a digest signature is invalid"""
210
def __init__(self, bucket, key):
211
message = ('Digest file\ts3://%s/%s\tINVALID: signature verification '
212
'failed') % (bucket, key)
213
super(DigestSignatureError, self).__init__(message)
214
215
216
class InvalidDigestFormat(DigestError):
217
"""Exception raised when a digest has an invalid format"""
218
def __init__(self, bucket, key):
219
message = 'Digest file\ts3://%s/%s\tINVALID: invalid format' % (bucket,
220
key)
221
super(InvalidDigestFormat, self).__init__(message)
222
223
224
class PublicKeyProvider(object):
225
"""Retrieves public keys from CloudTrail within a date range."""
226
def __init__(self, cloudtrail_client):
227
self._cloudtrail_client = cloudtrail_client
228
229
def get_public_keys(self, start_date, end_date):
230
"""Loads public keys in a date range into a returned dict.
231
232
:type start_date: datetime
233
:param start_date: Start date of a date range.
234
:type end_date: datetime
235
:param end_date: End date of a date range.
236
:rtype: dict
237
:return: Returns a dict where each key is the fingerprint of the
238
public key, and each value is a dict of public key data.
239
"""
240
public_keys = self._cloudtrail_client.list_public_keys(
241
StartTime=start_date, EndTime=end_date)
242
public_keys_in_range = public_keys['PublicKeyList']
243
LOG.debug('Loaded public keys in range: %s', public_keys_in_range)
244
return dict((key['Fingerprint'], key) for key in public_keys_in_range)
245
246
247
class DigestProvider(object):
248
"""
249
Retrieves digest keys and digests from Amazon S3.
250
251
This class is responsible for determining the full list of digest files
252
in a bucket and loading digests from the bucket into a JSON decoded
253
dict. This class is not responsible for validation or iterating from
254
one digest to the next.
255
"""
256
257
def __init__(
258
self,
259
s3_client_provider,
260
account_id,
261
trail_name,
262
trail_home_region,
263
trail_source_region=None,
264
organization_id=None,
265
):
266
self._client_provider = s3_client_provider
267
self.trail_name = trail_name
268
self.account_id = account_id
269
self.trail_home_region = trail_home_region
270
self.trail_source_region = trail_source_region or trail_home_region
271
self.organization_id = organization_id
272
273
def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date):
274
"""Returns a list of digest keys in the date range.
275
276
This method uses a list_objects API call and provides a Marker
277
parameter that is calculated based on the start_date provided.
278
Amazon S3 then returns all keys in the bucket that start after
279
the given key (non-inclusive). We then iterate over the keys
280
until the date extracted from the yielded keys is greater than
281
the given end_date.
282
"""
283
digests = []
284
marker = self._create_digest_key(start_date, prefix)
285
client = self._client_provider.get_client(bucket)
286
paginator = client.get_paginator('list_objects')
287
page_iterator = paginator.paginate(Bucket=bucket, Marker=marker)
288
key_filter = page_iterator.search('Contents[*].Key')
289
# Create a target start end end date
290
target_start_date = format_date(normalize_date(start_date))
291
# Add one hour to the end_date to get logs that spilled over to next.
292
target_end_date = format_date(
293
normalize_date(end_date + timedelta(hours=1)))
294
# Ensure digests are from the same trail.
295
digest_key_regex = re.compile(self._create_digest_key_regex(prefix))
296
for key in key_filter:
297
if digest_key_regex.match(key):
298
# Use a lexicographic comparison to know when to stop.
299
extracted_date = extract_digest_key_date(key)
300
if extracted_date > target_end_date:
301
break
302
# Only append digests after the start date.
303
if extracted_date >= target_start_date:
304
digests.append(key)
305
return digests
306
307
def fetch_digest(self, bucket, key):
308
"""Loads a digest by key from S3.
309
310
Returns the JSON decode data and GZIP inflated raw content.
311
"""
312
client = self._client_provider.get_client(bucket)
313
result = client.get_object(Bucket=bucket, Key=key)
314
try:
315
digest = zlib.decompress(result['Body'].read(),
316
zlib.MAX_WBITS | 16)
317
digest_data = json.loads(digest.decode())
318
except (ValueError, ZLibError):
319
# Cannot gzip decode or JSON parse.
320
raise InvalidDigestFormat(bucket, key)
321
# Add the expected digest signature and algorithm to the dict.
322
if 'signature' not in result['Metadata'] \
323
or 'signature-algorithm' not in result['Metadata']:
324
raise DigestSignatureError(bucket, key)
325
digest_data['_signature'] = result['Metadata']['signature']
326
digest_data['_signature_algorithm'] = \
327
result['Metadata']['signature-algorithm']
328
return digest_data, digest
329
330
def _create_digest_key(self, start_date, key_prefix):
331
"""Computes an Amazon S3 key based on the provided data.
332
333
The computed is what would have been placed in the S3 bucket if
334
a log digest were created at a specific time. This computed key
335
does not have to actually exist as it will only be used to as
336
a Marker parameter in a list_objects call.
337
338
:return: Returns a computed key as a string.
339
"""
340
# Subtract one minute to ensure the dates are inclusive.
341
date = start_date - timedelta(minutes=1)
342
template = 'AWSLogs/'
343
template_params = {
344
'account_id': self.account_id,
345
'date': format_date(date),
346
'ymd': date.strftime('%Y/%m/%d'),
347
'source_region': self.trail_source_region,
348
'home_region': self.trail_home_region,
349
'name': self.trail_name
350
}
351
if self.organization_id:
352
template += '{organization_id}/'
353
template_params['organization_id'] = self.organization_id
354
template += (
355
'{account_id}/CloudTrail-Digest/{source_region}/'
356
'{ymd}/{account_id}_CloudTrail-Digest_{source_region}_{name}_'
357
'{home_region}_{date}.json.gz'
358
)
359
key = template.format(**template_params)
360
if key_prefix:
361
key = key_prefix + '/' + key
362
return key
363
364
def _create_digest_key_regex(self, key_prefix):
365
"""Creates a regular expression used to match against S3 keys"""
366
template = 'AWSLogs/'
367
template_params = {
368
'account_id': re.escape(self.account_id),
369
'source_region': re.escape(self.trail_source_region),
370
'home_region': re.escape(self.trail_home_region),
371
'name': re.escape(self.trail_name)
372
}
373
if self.organization_id:
374
template += '{organization_id}/'
375
template_params['organization_id'] = self.organization_id
376
template += (
377
'{account_id}/CloudTrail\\-Digest/{source_region}/'
378
'\\d+/\\d+/\\d+/{account_id}_CloudTrail\\-Digest_'
379
'{source_region}_{name}_{home_region}_.+\\.json\\.gz'
380
)
381
key = template.format(**template_params)
382
if key_prefix:
383
key = re.escape(key_prefix) + '/' + key
384
return '^' + key + '$'
385
386
387
class DigestTraverser(object):
388
"""Retrieves and validates digests within a date range."""
389
# These keys are required to be present before validating the contents
390
# of a digest.
391
required_digest_keys = ['digestPublicKeyFingerprint', 'digestS3Bucket',
392
'digestS3Object', 'previousDigestSignature',
393
'digestEndTime', 'digestStartTime']
394
395
def __init__(self, digest_provider, starting_bucket, starting_prefix,
396
public_key_provider, digest_validator=None,
397
on_invalid=None, on_gap=None, on_missing=None):
398
"""
399
:type digest_provider: DigestProvider
400
:param digest_provider: DigestProvider object
401
:param starting_bucket: S3 bucket where the digests are stored.
402
:param starting_prefix: An optional prefix applied to each S3 key.
403
:param public_key_provider: Provides public keys for a range.
404
:param digest_validator: Validates digest using a validate method.
405
:param on_invalid: Callback invoked when a digest is invalid.
406
:param on_gap: Callback invoked when a digest has no parent, but
407
there are still more digests to validate.
408
:param on_missing: Callback invoked when a digest file is missing.
409
"""
410
self.starting_bucket = starting_bucket
411
self.starting_prefix = starting_prefix
412
self.digest_provider = digest_provider
413
self._public_key_provider = public_key_provider
414
self._on_gap = on_gap
415
self._on_invalid = on_invalid
416
self._on_missing = on_missing
417
if digest_validator is None:
418
digest_validator = Sha256RSADigestValidator()
419
self._digest_validator = digest_validator
420
421
def traverse(self, start_date, end_date=None):
422
"""Creates and returns a generator that yields validated digest data.
423
424
Each yielded digest dictionary contains information about the digest
425
and the log file associated with the digest. Digest files are validated
426
before they are yielded. Whether or not the digest is successfully
427
validated is stated in the "isValid" key value pair of the yielded
428
dictionary.
429
430
:type start_date: datetime
431
:param start_date: Date to start validating from (inclusive).
432
:type start_date: datetime
433
:param end_date: Date to stop validating at (inclusive).
434
"""
435
if end_date is None:
436
end_date = get_current_datetime()
437
end_date = normalize_date(end_date)
438
start_date = normalize_date(start_date)
439
bucket = self.starting_bucket
440
prefix = self.starting_prefix
441
digests = self._load_digests(bucket, prefix, start_date, end_date)
442
public_keys = self._load_public_keys(start_date, end_date)
443
key, end_date = self._get_last_digest(digests)
444
last_start_date = end_date
445
while key and start_date <= last_start_date:
446
try:
447
digest, end_date = self._load_and_validate_digest(
448
public_keys, bucket, key)
449
last_start_date = normalize_date(
450
parse_date(digest['digestStartTime']))
451
previous_bucket = digest.get('previousDigestS3Bucket', None)
452
yield digest
453
if previous_bucket is None:
454
# The chain is broken, so find next in digest store.
455
key, end_date = self._find_next_digest(
456
digests=digests, bucket=bucket, last_key=key,
457
last_start_date=last_start_date, cb=self._on_gap,
458
is_cb_conditional=True)
459
else:
460
key = digest['previousDigestS3Object']
461
if previous_bucket != bucket:
462
bucket = previous_bucket
463
# The bucket changed so reload the digest list.
464
digests = self._load_digests(
465
bucket, prefix, start_date, end_date)
466
except ClientError as e:
467
if e.response['Error']['Code'] != 'NoSuchKey':
468
raise e
469
key, end_date = self._find_next_digest(
470
digests=digests, bucket=bucket, last_key=key,
471
last_start_date=last_start_date, cb=self._on_missing,
472
message=str(e))
473
except DigestError as e:
474
key, end_date = self._find_next_digest(
475
digests=digests, bucket=bucket, last_key=key,
476
last_start_date=last_start_date, cb=self._on_invalid,
477
message=str(e))
478
except Exception as e:
479
# Any other unexpected errors.
480
key, end_date = self._find_next_digest(
481
digests=digests, bucket=bucket, last_key=key,
482
last_start_date=last_start_date, cb=self._on_invalid,
483
message='Digest file\ts3://%s/%s\tINVALID: %s'
484
% (bucket, key, str(e)))
485
486
def _load_digests(self, bucket, prefix, start_date, end_date):
487
return self.digest_provider.load_digest_keys_in_range(
488
bucket=bucket, prefix=prefix,
489
start_date=start_date, end_date=end_date)
490
491
def _find_next_digest(self, digests, bucket, last_key, last_start_date,
492
cb=None, is_cb_conditional=False, message=None):
493
"""Finds the next digest in the bucket and invokes any callback."""
494
next_key, next_end_date = self._get_last_digest(digests, last_key)
495
if cb and (not is_cb_conditional or next_key):
496
cb(bucket=bucket, next_key=next_key, last_key=last_key,
497
next_end_date=next_end_date, last_start_date=last_start_date,
498
message=message)
499
return next_key, next_end_date
500
501
def _get_last_digest(self, digests, before_key=None):
502
"""Finds the previous digest key (either the last or before before_key)
503
504
If no key is provided, the last digest is used. If a digest is found,
505
the end date of the provider is adjusted to match the found key's end
506
date.
507
"""
508
if not digests:
509
return None, None
510
elif before_key is None:
511
next_key = digests.pop()
512
next_key_date = normalize_date(
513
parse_date(extract_digest_key_date(next_key)))
514
return next_key, next_key_date
515
# find a key before the given key.
516
before_key_date = parse_date(extract_digest_key_date(before_key))
517
while digests:
518
next_key = digests.pop()
519
next_key_date = normalize_date(
520
parse_date(extract_digest_key_date(next_key)))
521
if next_key_date < before_key_date:
522
LOG.debug("Next found key: %s", next_key)
523
return next_key, next_key_date
524
return None, None
525
526
def _load_and_validate_digest(self, public_keys, bucket, key):
527
"""Loads and validates a digest from S3.
528
529
:param public_keys: Public key dictionary of fingerprint to dict.
530
:return: Returns a tuple of the digest data as a dict and end_date
531
:rtype: tuple
532
"""
533
digest_data, digest = self.digest_provider.fetch_digest(bucket, key)
534
for required_key in self.required_digest_keys:
535
if required_key not in digest_data:
536
raise InvalidDigestFormat(bucket, key)
537
# Ensure the bucket and key are the same as what's expected.
538
if digest_data['digestS3Bucket'] != bucket \
539
or digest_data['digestS3Object'] != key:
540
raise DigestError(
541
('Digest file\ts3://%s/%s\tINVALID: has been moved from its '
542
'original location') % (bucket, key))
543
# Get the public keys in the given time range.
544
fingerprint = digest_data['digestPublicKeyFingerprint']
545
if fingerprint not in public_keys:
546
raise DigestError(
547
('Digest file\ts3://%s/%s\tINVALID: public key not found in '
548
'region %s for fingerprint %s') %
549
(bucket, key, self.digest_provider.trail_home_region,
550
fingerprint))
551
public_key_hex = public_keys[fingerprint]['Value']
552
self._digest_validator.validate(
553
bucket, key, public_key_hex, digest_data, digest)
554
end_date = normalize_date(parse_date(digest_data['digestEndTime']))
555
return digest_data, end_date
556
557
def _load_public_keys(self, start_date, end_date):
558
public_keys = self._public_key_provider.get_public_keys(
559
start_date, end_date)
560
if not public_keys:
561
raise RuntimeError(
562
'No public keys found between %s and %s' %
563
(format_display_date(start_date),
564
format_display_date(end_date)))
565
return public_keys
566
567
568
class Sha256RSADigestValidator(object):
569
"""
570
Validates SHA256withRSA signed digests.
571
572
The result of validating the digest is inserted into the digest_data
573
dictionary using the isValid key value pair.
574
"""
575
576
def validate(self, bucket, key, public_key, digest_data, inflated_digest):
577
"""Validates a digest file.
578
579
Throws a DigestError when the digest is invalid.
580
581
:param bucket: Bucket of the digest file
582
:param key: Key of the digest file
583
:param public_key: Public key bytes.
584
:param digest_data: Dict of digest data returned when JSON
585
decoding a manifest.
586
:param inflated_digest: Inflated digest file contents as bytes.
587
"""
588
try:
589
decoded_key = base64.b64decode(public_key)
590
public_key = rsa.PublicKey.load_pkcs1(decoded_key, format='DER')
591
to_sign = self._create_string_to_sign(digest_data, inflated_digest)
592
signature_bytes = binascii.unhexlify(digest_data['_signature'])
593
rsa.verify(to_sign, signature_bytes, public_key)
594
except PyAsn1Error:
595
raise DigestError(
596
('Digest file\ts3://%s/%s\tINVALID: Unable to load PKCS #1 key'
597
' with fingerprint %s')
598
% (bucket, key, digest_data['digestPublicKeyFingerprint']))
599
except rsa.pkcs1.VerificationError:
600
# Note from the Python-RSA docs: Never display the stack trace of
601
# a rsa.pkcs1.VerificationError exception. It shows where in the
602
# code the exception occurred, and thus leaks information about
603
# the key.
604
raise DigestSignatureError(bucket, key)
605
606
def _create_string_to_sign(self, digest_data, inflated_digest):
607
previous_signature = digest_data['previousDigestSignature']
608
if previous_signature is None:
609
# The value must be 'null' to match the Java implementation.
610
previous_signature = 'null'
611
string_to_sign = "%s\n%s/%s\n%s\n%s" % (
612
digest_data['digestEndTime'],
613
digest_data['digestS3Bucket'],
614
digest_data['digestS3Object'],
615
hashlib.sha256(inflated_digest).hexdigest(),
616
previous_signature)
617
LOG.debug('Digest string to sign: %s', string_to_sign)
618
return string_to_sign.encode()
619
620
621
class CloudTrailValidateLogs(BasicCommand):
622
"""
623
Validates log digests and log files, optionally saving them to disk.
624
"""
625
NAME = 'validate-logs'
626
DESCRIPTION = """
627
Validates CloudTrail logs for a given period of time.
628
629
This command uses the digest files delivered to your S3 bucket to perform
630
the validation.
631
632
The AWS CLI allows you to detect the following types of changes:
633
634
- Modification or deletion of CloudTrail log files.
635
- Modification or deletion of CloudTrail digest files.
636
637
To validate log files with the AWS CLI, the following preconditions must
638
be met:
639
640
- You must have online connectivity to AWS.
641
- You must have read access to the S3 bucket that contains the digest and
642
log files.
643
- The digest and log files must not have been moved from the original S3
644
location where CloudTrail delivered them.
645
- For organization trails you must have access to describe-organization to
646
validate digest files
647
648
When you disable Log File Validation, the chain of digest files is broken
649
after one hour. CloudTrail will not digest log files that were delivered
650
during a period in which the Log File Validation feature was disabled.
651
For example, if you enable Log File Validation on January 1, disable it
652
on January 2, and re-enable it on January 10, digest files will not be
653
created for the log files delivered from January 3 to January 9. The same
654
applies whenever you stop CloudTrail logging or delete a trail.
655
656
.. note::
657
658
Log files that have been downloaded to local disk cannot be validated
659
with the AWS CLI. The CLI will download all log files each time this
660
command is executed.
661
662
.. note::
663
664
This command requires that the role executing the command has
665
permission to call ListObjects, GetObject, and GetBucketLocation for
666
each bucket referenced by the trail.
667
668
"""
669
670
ARG_TABLE = [
671
{'name': 'trail-arn', 'required': True, 'cli_type_name': 'string',
672
'help_text': 'Specifies the ARN of the trail to be validated'},
673
{'name': 'start-time', 'required': True, 'cli_type_name': 'string',
674
'help_text': ('Specifies that log files delivered on or after the '
675
'specified UTC timestamp value will be validated. '
676
'Example: "2015-01-08T05:21:42Z".')},
677
{'name': 'end-time', 'cli_type_name': 'string',
678
'help_text': ('Optionally specifies that log files delivered on or '
679
'before the specified UTC timestamp value will be '
680
'validated. The default value is the current time. '
681
'Example: "2015-01-08T12:31:41Z".')},
682
{'name': 's3-bucket', 'cli_type_name': 'string',
683
'help_text': ('Optionally specifies the S3 bucket where the digest '
684
'files are stored. If a bucket name is not specified, '
685
'the CLI will retrieve it by calling describe_trails')},
686
{'name': 's3-prefix', 'cli_type_name': 'string',
687
'help_text': ('Optionally specifies the optional S3 prefix where the '
688
'digest files are stored. If not specified, the CLI '
689
'will determine the prefix automatically by calling '
690
'describe_trails.')},
691
{'name': 'account-id', 'cli_type_name': 'string',
692
'help_text': ('Optionally specifies the account for validating logs. '
693
'This parameter is needed for organization trails '
694
'for validating logs for specific account inside an '
695
'organization')},
696
{'name': 'verbose', 'cli_type_name': 'boolean',
697
'action': 'store_true',
698
'help_text': 'Display verbose log validation information'}
699
]
700
701
def __init__(self, session):
702
super(CloudTrailValidateLogs, self).__init__(session)
703
self.trail_arn = None
704
self.is_verbose = False
705
self.start_time = None
706
self.end_time = None
707
self.s3_bucket = None
708
self.s3_prefix = None
709
self.s3_client_provider = None
710
self.cloudtrail_client = None
711
self.account_id = None
712
self._source_region = None
713
self._valid_digests = 0
714
self._invalid_digests = 0
715
self._valid_logs = 0
716
self._invalid_logs = 0
717
self._is_last_status_double_space = True
718
self._found_start_time = None
719
self._found_end_time = None
720
721
def _run_main(self, args, parsed_globals):
722
self.handle_args(args)
723
self.setup_services(parsed_globals)
724
self._call()
725
if self._invalid_digests > 0 or self._invalid_logs > 0:
726
return 1
727
return 0
728
729
def handle_args(self, args):
730
self.trail_arn = args.trail_arn
731
self.is_verbose = args.verbose
732
self.s3_bucket = args.s3_bucket
733
self.s3_prefix = args.s3_prefix
734
self.account_id = args.account_id
735
self.start_time = normalize_date(parse_date(args.start_time))
736
if args.end_time:
737
self.end_time = normalize_date(parse_date(args.end_time))
738
else:
739
self.end_time = normalize_date(get_current_datetime())
740
if self.start_time > self.end_time:
741
raise ValueError(('Invalid time range specified: start-time must '
742
'occur before end-time'))
743
# Found start time always defaults to the given start time. This value
744
# may change if the earliest found digest is after the given start
745
# time. Note that the summary output report of what date ranges were
746
# actually found is only shown if a valid digest is encountered,
747
# thereby setting self._found_end_time to a value.
748
self._found_start_time = self.start_time
749
750
def setup_services(self, parsed_globals):
751
self._source_region = parsed_globals.region
752
# Use the the same region as the region of the CLI to get locations.
753
self.s3_client_provider = S3ClientProvider(
754
self._session, self._source_region)
755
client_args = {'region_name': parsed_globals.region,
756
'verify': parsed_globals.verify_ssl}
757
self.organization_client = create_nested_client(
758
self._session, 'organizations', **client_args)
759
760
if parsed_globals.endpoint_url is not None:
761
client_args['endpoint_url'] = parsed_globals.endpoint_url
762
self.cloudtrail_client = create_nested_client(
763
self._session, 'cloudtrail', **client_args)
764
765
def _call(self):
766
traverser = create_digest_traverser(
767
trail_arn=self.trail_arn, cloudtrail_client=self.cloudtrail_client,
768
organization_client=self.organization_client,
769
trail_source_region=self._source_region,
770
s3_client_provider=self.s3_client_provider, bucket=self.s3_bucket,
771
prefix=self.s3_prefix, on_missing=self._on_missing_digest,
772
on_invalid=self._on_invalid_digest, on_gap=self._on_digest_gap,
773
account_id=self.account_id)
774
self._write_startup_text()
775
digests = traverser.traverse(self.start_time, self.end_time)
776
for digest in digests:
777
# Only valid digests are yielded and only valid digests can adjust
778
# the found times that are reported in the CLI output summary.
779
self._track_found_times(digest)
780
self._valid_digests += 1
781
self._write_status(
782
'Digest file\ts3://%s/%s\tvalid'
783
% (digest['digestS3Bucket'], digest['digestS3Object']))
784
if not digest['logFiles']:
785
continue
786
for log in digest['logFiles']:
787
self._download_log(log)
788
self._write_summary_text()
789
790
def _track_found_times(self, digest):
791
# Track the earliest found start time, but do not use a date before
792
# the user supplied start date.
793
digest_start_time = parse_date(digest['digestStartTime'])
794
if digest_start_time > self.start_time:
795
self._found_start_time = digest_start_time
796
# Only use the last found end time if it is less than the
797
# user supplied end time (or the current date).
798
if not self._found_end_time:
799
digest_end_time = parse_date(digest['digestEndTime'])
800
self._found_end_time = min(digest_end_time, self.end_time)
801
802
def _download_log(self, log):
803
""" Download a log, decompress, and compare SHA256 checksums"""
804
try:
805
# Create a client that can work with this bucket.
806
client = self.s3_client_provider.get_client(log['s3Bucket'])
807
response = client.get_object(
808
Bucket=log['s3Bucket'], Key=log['s3Object'])
809
gzip_inflater = zlib.decompressobj(zlib.MAX_WBITS | 16)
810
rolling_hash = hashlib.sha256()
811
for chunk in iter(lambda: response['Body'].read(2048), b""):
812
data = gzip_inflater.decompress(chunk)
813
rolling_hash.update(data)
814
remaining_data = gzip_inflater.flush()
815
if remaining_data:
816
rolling_hash.update(remaining_data)
817
computed_hash = rolling_hash.hexdigest()
818
if computed_hash != log['hashValue']:
819
self._on_log_invalid(log)
820
else:
821
self._valid_logs += 1
822
self._write_status(('Log file\ts3://%s/%s\tvalid'
823
% (log['s3Bucket'], log['s3Object'])))
824
except ClientError as e:
825
if e.response['Error']['Code'] != 'NoSuchKey':
826
raise
827
self._on_missing_log(log)
828
except Exception:
829
self._on_invalid_log_format(log)
830
831
def _write_status(self, message, is_error=False):
832
if is_error:
833
if self._is_last_status_double_space:
834
sys.stderr.write("%s\n\n" % message)
835
else:
836
sys.stderr.write("\n%s\n\n" % message)
837
self._is_last_status_double_space = True
838
elif self.is_verbose:
839
self._is_last_status_double_space = False
840
sys.stdout.write("%s\n" % message)
841
842
def _write_startup_text(self):
843
sys.stdout.write(
844
'Validating log files for trail %s between %s and %s\n\n'
845
% (self.trail_arn, format_display_date(self.start_time),
846
format_display_date(self.end_time)))
847
848
def _write_summary_text(self):
849
if not self._is_last_status_double_space:
850
sys.stdout.write('\n')
851
sys.stdout.write('Results requested for %s to %s\n'
852
% (format_display_date(self.start_time),
853
format_display_date(self.end_time)))
854
if not self._valid_digests and not self._invalid_digests:
855
sys.stdout.write('No digests found\n')
856
return
857
if not self._found_start_time or not self._found_end_time:
858
sys.stdout.write('No valid digests found in range\n')
859
else:
860
sys.stdout.write('Results found for %s to %s:\n'
861
% (format_display_date(self._found_start_time),
862
format_display_date(self._found_end_time)))
863
self._write_ratio(self._valid_digests, self._invalid_digests, 'digest')
864
self._write_ratio(self._valid_logs, self._invalid_logs, 'log')
865
sys.stdout.write('\n')
866
867
def _write_ratio(self, valid, invalid, name):
868
total = valid + invalid
869
if total > 0:
870
sys.stdout.write('\n%d/%d %s files valid' % (valid, total, name))
871
if invalid > 0:
872
sys.stdout.write(', %d/%d %s files INVALID' % (invalid, total,
873
name))
874
875
def _on_missing_digest(self, bucket, last_key, **kwargs):
876
self._invalid_digests += 1
877
self._write_status('Digest file\ts3://%s/%s\tINVALID: not found'
878
% (bucket, last_key), True)
879
880
def _on_digest_gap(self, **kwargs):
881
self._write_status(
882
'No log files were delivered by CloudTrail between %s and %s'
883
% (format_display_date(kwargs['next_end_date']),
884
format_display_date(kwargs['last_start_date'])), True)
885
886
def _on_invalid_digest(self, message, **kwargs):
887
self._invalid_digests += 1
888
self._write_status(message, True)
889
890
def _on_invalid_log_format(self, log_data):
891
self._invalid_logs += 1
892
self._write_status(
893
('Log file\ts3://%s/%s\tINVALID: invalid format'
894
% (log_data['s3Bucket'], log_data['s3Object'])), True)
895
896
def _on_log_invalid(self, log_data):
897
self._invalid_logs += 1
898
self._write_status(
899
"Log file\ts3://%s/%s\tINVALID: hash value doesn't match"
900
% (log_data['s3Bucket'], log_data['s3Object']), True)
901
902
def _on_missing_log(self, log_data):
903
self._invalid_logs += 1
904
self._write_status(
905
'Log file\ts3://%s/%s\tINVALID: not found'
906
% (log_data['s3Bucket'], log_data['s3Object']), True)
907
908