Path: blob/develop/awscli/customizations/cloudtrail/validation.py
1567 views
# Copyright 2012-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.1#2# Licensed under the Apache License, Version 2.0 (the "License"). You3# may not use this file except in compliance with the License. A copy of4# the License is located at5#6# http://aws.amazon.com/apache2.0/7#8# or in the "license" file accompanying this file. This file is9# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF10# ANY KIND, either express or implied. See the License for the specific11# language governing permissions and limitations under the License.12import base6413import binascii14import json15import hashlib16import logging17import re18import sys19import zlib20from zlib import error as ZLibError21from datetime import timedelta22from dateutil import tz, parser2324from pyasn1.error import PyAsn1Error25import rsa2627from awscli.customizations.cloudtrail.utils import get_trail_by_arn, \28get_account_id_from_arn29from awscli.customizations.commands import BasicCommand30from botocore.exceptions import ClientError31from awscli.compat import get_current_datetime32from awscli.schema import ParameterRequiredError33from awscli.utils import create_nested_client3435LOG = logging.getLogger(__name__)36DATE_FORMAT = '%Y%m%dT%H%M%SZ'37DISPLAY_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'383940def format_date(date):41"""Returns a formatted date string in a CloudTrail date format"""42return date.strftime(DATE_FORMAT)434445def format_display_date(date):46"""Returns a formatted date string meant for CLI output"""47return date.strftime(DISPLAY_DATE_FORMAT)484950def normalize_date(date):51"""Returns a normalized date using a UTC timezone"""52return date.replace(tzinfo=tz.tzutc())535455def extract_digest_key_date(digest_s3_key):56"""Extract the timestamp portion of a manifest file.5758Manifest file names take the following form:59AWSLogs/{account}/CloudTrail-Digest/{region}/{ymd}/{account}_CloudTrail \60-Digest_{region}_{name}_region_{date}.json.gz61"""62return digest_s3_key[-24:-8]636465def parse_date(date_string):66try:67return parser.parse(date_string)68except ValueError:69raise ValueError('Unable to parse date value: %s' % date_string)707172def assert_cloudtrail_arn_is_valid(trail_arn):73"""Ensures that the arn looks correct.7475ARNs look like: arn:aws:cloudtrail:us-east-1:123456789012:trail/foo"""76pattern = re.compile(r'arn:.+:cloudtrail:.+:\d{12}:trail/.+')77if not pattern.match(trail_arn):78raise ValueError('Invalid trail ARN provided: %s' % trail_arn)798081def create_digest_traverser(82cloudtrail_client,83organization_client,84s3_client_provider,85trail_arn,86trail_source_region=None,87on_invalid=None,88on_gap=None,89on_missing=None,90bucket=None,91prefix=None,92account_id=None,93):94"""Creates a CloudTrail DigestTraverser and its object graph.9596:type cloudtrail_client: botocore.client.CloudTrail97:param cloudtrail_client: Client used to connect to CloudTrail98:type organization_client: botocore.client.organizations99:param organization_client: Client used to connect to Organizations100:type s3_client_provider: S3ClientProvider101:param s3_client_provider: Used to create Amazon S3 client per/region.102:param trail_arn: CloudTrail trail ARN103:param trail_source_region: The scanned region of a trail.104:param on_invalid: Callback that is invoked when validating a digest fails.105:param on_gap: Callback that is invoked when a digest has no link to the106previous digest, but there are more digests to validate. This can107happen when a trail is disabled for a period of time.108:param on_missing: Callback that is invoked when a digest file has been109deleted from Amazon S3 but is supposed to be present.110:param bucket: Amazon S3 bucket of the trail if it is different than the111bucket that is currently associated with the trail.112:param prefix: bucket: Key prefix prepended to each digest and log placed113in the Amazon S3 bucket if it is different than the prefix that is114currently associated with the trail.115:param account_id: The account id for which the digest files are116validated. For normal trails this is the caller account, for117organization trails it is the member accout.118119``on_gap``, ``on_invalid``, and ``on_missing`` callbacks are invoked with120the following named arguments:121122- ``bucket`: The next S3 bucket.123- ``next_key``: (optional) Next digest key that was found in the bucket.124- ``next_end_date``: (optional) End date of the next found digest.125- ``last_key``: The last digest key that was found.126- ``last_start_date``: (optional) Start date of last found digest.127- ``message``: (optional) Message string about the notification.128"""129assert_cloudtrail_arn_is_valid(trail_arn)130organization_id = None131if bucket is None:132# Determine the bucket and prefix based on the trail arn.133trail_info = get_trail_by_arn(cloudtrail_client, trail_arn)134LOG.debug('Loaded trail info: %s', trail_info)135bucket = trail_info['S3BucketName']136prefix = trail_info.get('S3KeyPrefix', None)137is_org_trail = trail_info.get('IsOrganizationTrail')138if is_org_trail:139if not account_id:140raise ParameterRequiredError(141"Missing required parameter for organization "142"trail: '--account-id'")143organization_id = organization_client.describe_organization()[144'Organization']['Id']145146# Determine the region from the ARN (e.g., arn:aws:cloudtrail:REGION:...)147trail_region = trail_arn.split(':')[3]148# Determine the name from the ARN (the last part after "/")149trail_name = trail_arn.split('/')[-1]150# If account id is not specified parse it from trail ARN151if not account_id:152account_id = get_account_id_from_arn(trail_arn)153154digest_provider = DigestProvider(155account_id=account_id, trail_name=trail_name,156s3_client_provider=s3_client_provider,157trail_source_region=trail_source_region,158trail_home_region=trail_region,159organization_id=organization_id)160return DigestTraverser(161digest_provider=digest_provider, starting_bucket=bucket,162starting_prefix=prefix, on_invalid=on_invalid, on_gap=on_gap,163on_missing=on_missing,164public_key_provider=PublicKeyProvider(cloudtrail_client))165166167class S3ClientProvider(object):168"""Creates Amazon S3 clients and determines the region name of a client.169170This class will cache the location constraints of previously requested171buckets and cache previously created clients for the same region.172"""173def __init__(self, session, get_bucket_location_region='us-east-1'):174self._session = session175self._get_bucket_location_region = get_bucket_location_region176self._client_cache = {}177self._region_cache = {}178179def get_client(self, bucket_name):180"""Creates an S3 client that can work with the given bucket name"""181region_name = self._get_bucket_region(bucket_name)182return self._create_client(region_name)183184def _get_bucket_region(self, bucket_name):185"""Returns the region of a bucket"""186if bucket_name not in self._region_cache:187client = self._create_client(self._get_bucket_location_region)188result = client.get_bucket_location(Bucket=bucket_name)189region = result['LocationConstraint'] or 'us-east-1'190self._region_cache[bucket_name] = region191return self._region_cache[bucket_name]192193def _create_client(self, region_name):194"""Creates an Amazon S3 client for the given region name"""195if region_name not in self._client_cache:196client = create_nested_client(self._session, 's3', region_name=region_name)197# Remove the CLI error event that prevents exceptions.198self._client_cache[region_name] = client199return self._client_cache[region_name]200201202class DigestError(ValueError):203"""Exception raised when a digest fails to validate"""204pass205206207class DigestSignatureError(DigestError):208"""Exception raised when a digest signature is invalid"""209def __init__(self, bucket, key):210message = ('Digest file\ts3://%s/%s\tINVALID: signature verification '211'failed') % (bucket, key)212super(DigestSignatureError, self).__init__(message)213214215class InvalidDigestFormat(DigestError):216"""Exception raised when a digest has an invalid format"""217def __init__(self, bucket, key):218message = 'Digest file\ts3://%s/%s\tINVALID: invalid format' % (bucket,219key)220super(InvalidDigestFormat, self).__init__(message)221222223class PublicKeyProvider(object):224"""Retrieves public keys from CloudTrail within a date range."""225def __init__(self, cloudtrail_client):226self._cloudtrail_client = cloudtrail_client227228def get_public_keys(self, start_date, end_date):229"""Loads public keys in a date range into a returned dict.230231:type start_date: datetime232:param start_date: Start date of a date range.233:type end_date: datetime234:param end_date: End date of a date range.235:rtype: dict236:return: Returns a dict where each key is the fingerprint of the237public key, and each value is a dict of public key data.238"""239public_keys = self._cloudtrail_client.list_public_keys(240StartTime=start_date, EndTime=end_date)241public_keys_in_range = public_keys['PublicKeyList']242LOG.debug('Loaded public keys in range: %s', public_keys_in_range)243return dict((key['Fingerprint'], key) for key in public_keys_in_range)244245246class DigestProvider(object):247"""248Retrieves digest keys and digests from Amazon S3.249250This class is responsible for determining the full list of digest files251in a bucket and loading digests from the bucket into a JSON decoded252dict. This class is not responsible for validation or iterating from253one digest to the next.254"""255256def __init__(257self,258s3_client_provider,259account_id,260trail_name,261trail_home_region,262trail_source_region=None,263organization_id=None,264):265self._client_provider = s3_client_provider266self.trail_name = trail_name267self.account_id = account_id268self.trail_home_region = trail_home_region269self.trail_source_region = trail_source_region or trail_home_region270self.organization_id = organization_id271272def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date):273"""Returns a list of digest keys in the date range.274275This method uses a list_objects API call and provides a Marker276parameter that is calculated based on the start_date provided.277Amazon S3 then returns all keys in the bucket that start after278the given key (non-inclusive). We then iterate over the keys279until the date extracted from the yielded keys is greater than280the given end_date.281"""282digests = []283marker = self._create_digest_key(start_date, prefix)284client = self._client_provider.get_client(bucket)285paginator = client.get_paginator('list_objects')286page_iterator = paginator.paginate(Bucket=bucket, Marker=marker)287key_filter = page_iterator.search('Contents[*].Key')288# Create a target start end end date289target_start_date = format_date(normalize_date(start_date))290# Add one hour to the end_date to get logs that spilled over to next.291target_end_date = format_date(292normalize_date(end_date + timedelta(hours=1)))293# Ensure digests are from the same trail.294digest_key_regex = re.compile(self._create_digest_key_regex(prefix))295for key in key_filter:296if digest_key_regex.match(key):297# Use a lexicographic comparison to know when to stop.298extracted_date = extract_digest_key_date(key)299if extracted_date > target_end_date:300break301# Only append digests after the start date.302if extracted_date >= target_start_date:303digests.append(key)304return digests305306def fetch_digest(self, bucket, key):307"""Loads a digest by key from S3.308309Returns the JSON decode data and GZIP inflated raw content.310"""311client = self._client_provider.get_client(bucket)312result = client.get_object(Bucket=bucket, Key=key)313try:314digest = zlib.decompress(result['Body'].read(),315zlib.MAX_WBITS | 16)316digest_data = json.loads(digest.decode())317except (ValueError, ZLibError):318# Cannot gzip decode or JSON parse.319raise InvalidDigestFormat(bucket, key)320# Add the expected digest signature and algorithm to the dict.321if 'signature' not in result['Metadata'] \322or 'signature-algorithm' not in result['Metadata']:323raise DigestSignatureError(bucket, key)324digest_data['_signature'] = result['Metadata']['signature']325digest_data['_signature_algorithm'] = \326result['Metadata']['signature-algorithm']327return digest_data, digest328329def _create_digest_key(self, start_date, key_prefix):330"""Computes an Amazon S3 key based on the provided data.331332The computed is what would have been placed in the S3 bucket if333a log digest were created at a specific time. This computed key334does not have to actually exist as it will only be used to as335a Marker parameter in a list_objects call.336337:return: Returns a computed key as a string.338"""339# Subtract one minute to ensure the dates are inclusive.340date = start_date - timedelta(minutes=1)341template = 'AWSLogs/'342template_params = {343'account_id': self.account_id,344'date': format_date(date),345'ymd': date.strftime('%Y/%m/%d'),346'source_region': self.trail_source_region,347'home_region': self.trail_home_region,348'name': self.trail_name349}350if self.organization_id:351template += '{organization_id}/'352template_params['organization_id'] = self.organization_id353template += (354'{account_id}/CloudTrail-Digest/{source_region}/'355'{ymd}/{account_id}_CloudTrail-Digest_{source_region}_{name}_'356'{home_region}_{date}.json.gz'357)358key = template.format(**template_params)359if key_prefix:360key = key_prefix + '/' + key361return key362363def _create_digest_key_regex(self, key_prefix):364"""Creates a regular expression used to match against S3 keys"""365template = 'AWSLogs/'366template_params = {367'account_id': re.escape(self.account_id),368'source_region': re.escape(self.trail_source_region),369'home_region': re.escape(self.trail_home_region),370'name': re.escape(self.trail_name)371}372if self.organization_id:373template += '{organization_id}/'374template_params['organization_id'] = self.organization_id375template += (376'{account_id}/CloudTrail\\-Digest/{source_region}/'377'\\d+/\\d+/\\d+/{account_id}_CloudTrail\\-Digest_'378'{source_region}_{name}_{home_region}_.+\\.json\\.gz'379)380key = template.format(**template_params)381if key_prefix:382key = re.escape(key_prefix) + '/' + key383return '^' + key + '$'384385386class DigestTraverser(object):387"""Retrieves and validates digests within a date range."""388# These keys are required to be present before validating the contents389# of a digest.390required_digest_keys = ['digestPublicKeyFingerprint', 'digestS3Bucket',391'digestS3Object', 'previousDigestSignature',392'digestEndTime', 'digestStartTime']393394def __init__(self, digest_provider, starting_bucket, starting_prefix,395public_key_provider, digest_validator=None,396on_invalid=None, on_gap=None, on_missing=None):397"""398:type digest_provider: DigestProvider399:param digest_provider: DigestProvider object400:param starting_bucket: S3 bucket where the digests are stored.401:param starting_prefix: An optional prefix applied to each S3 key.402:param public_key_provider: Provides public keys for a range.403:param digest_validator: Validates digest using a validate method.404:param on_invalid: Callback invoked when a digest is invalid.405:param on_gap: Callback invoked when a digest has no parent, but406there are still more digests to validate.407:param on_missing: Callback invoked when a digest file is missing.408"""409self.starting_bucket = starting_bucket410self.starting_prefix = starting_prefix411self.digest_provider = digest_provider412self._public_key_provider = public_key_provider413self._on_gap = on_gap414self._on_invalid = on_invalid415self._on_missing = on_missing416if digest_validator is None:417digest_validator = Sha256RSADigestValidator()418self._digest_validator = digest_validator419420def traverse(self, start_date, end_date=None):421"""Creates and returns a generator that yields validated digest data.422423Each yielded digest dictionary contains information about the digest424and the log file associated with the digest. Digest files are validated425before they are yielded. Whether or not the digest is successfully426validated is stated in the "isValid" key value pair of the yielded427dictionary.428429:type start_date: datetime430:param start_date: Date to start validating from (inclusive).431:type start_date: datetime432:param end_date: Date to stop validating at (inclusive).433"""434if end_date is None:435end_date = get_current_datetime()436end_date = normalize_date(end_date)437start_date = normalize_date(start_date)438bucket = self.starting_bucket439prefix = self.starting_prefix440digests = self._load_digests(bucket, prefix, start_date, end_date)441public_keys = self._load_public_keys(start_date, end_date)442key, end_date = self._get_last_digest(digests)443last_start_date = end_date444while key and start_date <= last_start_date:445try:446digest, end_date = self._load_and_validate_digest(447public_keys, bucket, key)448last_start_date = normalize_date(449parse_date(digest['digestStartTime']))450previous_bucket = digest.get('previousDigestS3Bucket', None)451yield digest452if previous_bucket is None:453# The chain is broken, so find next in digest store.454key, end_date = self._find_next_digest(455digests=digests, bucket=bucket, last_key=key,456last_start_date=last_start_date, cb=self._on_gap,457is_cb_conditional=True)458else:459key = digest['previousDigestS3Object']460if previous_bucket != bucket:461bucket = previous_bucket462# The bucket changed so reload the digest list.463digests = self._load_digests(464bucket, prefix, start_date, end_date)465except ClientError as e:466if e.response['Error']['Code'] != 'NoSuchKey':467raise e468key, end_date = self._find_next_digest(469digests=digests, bucket=bucket, last_key=key,470last_start_date=last_start_date, cb=self._on_missing,471message=str(e))472except DigestError as e:473key, end_date = self._find_next_digest(474digests=digests, bucket=bucket, last_key=key,475last_start_date=last_start_date, cb=self._on_invalid,476message=str(e))477except Exception as e:478# Any other unexpected errors.479key, end_date = self._find_next_digest(480digests=digests, bucket=bucket, last_key=key,481last_start_date=last_start_date, cb=self._on_invalid,482message='Digest file\ts3://%s/%s\tINVALID: %s'483% (bucket, key, str(e)))484485def _load_digests(self, bucket, prefix, start_date, end_date):486return self.digest_provider.load_digest_keys_in_range(487bucket=bucket, prefix=prefix,488start_date=start_date, end_date=end_date)489490def _find_next_digest(self, digests, bucket, last_key, last_start_date,491cb=None, is_cb_conditional=False, message=None):492"""Finds the next digest in the bucket and invokes any callback."""493next_key, next_end_date = self._get_last_digest(digests, last_key)494if cb and (not is_cb_conditional or next_key):495cb(bucket=bucket, next_key=next_key, last_key=last_key,496next_end_date=next_end_date, last_start_date=last_start_date,497message=message)498return next_key, next_end_date499500def _get_last_digest(self, digests, before_key=None):501"""Finds the previous digest key (either the last or before before_key)502503If no key is provided, the last digest is used. If a digest is found,504the end date of the provider is adjusted to match the found key's end505date.506"""507if not digests:508return None, None509elif before_key is None:510next_key = digests.pop()511next_key_date = normalize_date(512parse_date(extract_digest_key_date(next_key)))513return next_key, next_key_date514# find a key before the given key.515before_key_date = parse_date(extract_digest_key_date(before_key))516while digests:517next_key = digests.pop()518next_key_date = normalize_date(519parse_date(extract_digest_key_date(next_key)))520if next_key_date < before_key_date:521LOG.debug("Next found key: %s", next_key)522return next_key, next_key_date523return None, None524525def _load_and_validate_digest(self, public_keys, bucket, key):526"""Loads and validates a digest from S3.527528:param public_keys: Public key dictionary of fingerprint to dict.529:return: Returns a tuple of the digest data as a dict and end_date530:rtype: tuple531"""532digest_data, digest = self.digest_provider.fetch_digest(bucket, key)533for required_key in self.required_digest_keys:534if required_key not in digest_data:535raise InvalidDigestFormat(bucket, key)536# Ensure the bucket and key are the same as what's expected.537if digest_data['digestS3Bucket'] != bucket \538or digest_data['digestS3Object'] != key:539raise DigestError(540('Digest file\ts3://%s/%s\tINVALID: has been moved from its '541'original location') % (bucket, key))542# Get the public keys in the given time range.543fingerprint = digest_data['digestPublicKeyFingerprint']544if fingerprint not in public_keys:545raise DigestError(546('Digest file\ts3://%s/%s\tINVALID: public key not found in '547'region %s for fingerprint %s') %548(bucket, key, self.digest_provider.trail_home_region,549fingerprint))550public_key_hex = public_keys[fingerprint]['Value']551self._digest_validator.validate(552bucket, key, public_key_hex, digest_data, digest)553end_date = normalize_date(parse_date(digest_data['digestEndTime']))554return digest_data, end_date555556def _load_public_keys(self, start_date, end_date):557public_keys = self._public_key_provider.get_public_keys(558start_date, end_date)559if not public_keys:560raise RuntimeError(561'No public keys found between %s and %s' %562(format_display_date(start_date),563format_display_date(end_date)))564return public_keys565566567class Sha256RSADigestValidator(object):568"""569Validates SHA256withRSA signed digests.570571The result of validating the digest is inserted into the digest_data572dictionary using the isValid key value pair.573"""574575def validate(self, bucket, key, public_key, digest_data, inflated_digest):576"""Validates a digest file.577578Throws a DigestError when the digest is invalid.579580:param bucket: Bucket of the digest file581:param key: Key of the digest file582:param public_key: Public key bytes.583:param digest_data: Dict of digest data returned when JSON584decoding a manifest.585:param inflated_digest: Inflated digest file contents as bytes.586"""587try:588decoded_key = base64.b64decode(public_key)589public_key = rsa.PublicKey.load_pkcs1(decoded_key, format='DER')590to_sign = self._create_string_to_sign(digest_data, inflated_digest)591signature_bytes = binascii.unhexlify(digest_data['_signature'])592rsa.verify(to_sign, signature_bytes, public_key)593except PyAsn1Error:594raise DigestError(595('Digest file\ts3://%s/%s\tINVALID: Unable to load PKCS #1 key'596' with fingerprint %s')597% (bucket, key, digest_data['digestPublicKeyFingerprint']))598except rsa.pkcs1.VerificationError:599# Note from the Python-RSA docs: Never display the stack trace of600# a rsa.pkcs1.VerificationError exception. It shows where in the601# code the exception occurred, and thus leaks information about602# the key.603raise DigestSignatureError(bucket, key)604605def _create_string_to_sign(self, digest_data, inflated_digest):606previous_signature = digest_data['previousDigestSignature']607if previous_signature is None:608# The value must be 'null' to match the Java implementation.609previous_signature = 'null'610string_to_sign = "%s\n%s/%s\n%s\n%s" % (611digest_data['digestEndTime'],612digest_data['digestS3Bucket'],613digest_data['digestS3Object'],614hashlib.sha256(inflated_digest).hexdigest(),615previous_signature)616LOG.debug('Digest string to sign: %s', string_to_sign)617return string_to_sign.encode()618619620class CloudTrailValidateLogs(BasicCommand):621"""622Validates log digests and log files, optionally saving them to disk.623"""624NAME = 'validate-logs'625DESCRIPTION = """626Validates CloudTrail logs for a given period of time.627628This command uses the digest files delivered to your S3 bucket to perform629the validation.630631The AWS CLI allows you to detect the following types of changes:632633- Modification or deletion of CloudTrail log files.634- Modification or deletion of CloudTrail digest files.635636To validate log files with the AWS CLI, the following preconditions must637be met:638639- You must have online connectivity to AWS.640- You must have read access to the S3 bucket that contains the digest and641log files.642- The digest and log files must not have been moved from the original S3643location where CloudTrail delivered them.644- For organization trails you must have access to describe-organization to645validate digest files646647When you disable Log File Validation, the chain of digest files is broken648after one hour. CloudTrail will not digest log files that were delivered649during a period in which the Log File Validation feature was disabled.650For example, if you enable Log File Validation on January 1, disable it651on January 2, and re-enable it on January 10, digest files will not be652created for the log files delivered from January 3 to January 9. The same653applies whenever you stop CloudTrail logging or delete a trail.654655.. note::656657Log files that have been downloaded to local disk cannot be validated658with the AWS CLI. The CLI will download all log files each time this659command is executed.660661.. note::662663This command requires that the role executing the command has664permission to call ListObjects, GetObject, and GetBucketLocation for665each bucket referenced by the trail.666667"""668669ARG_TABLE = [670{'name': 'trail-arn', 'required': True, 'cli_type_name': 'string',671'help_text': 'Specifies the ARN of the trail to be validated'},672{'name': 'start-time', 'required': True, 'cli_type_name': 'string',673'help_text': ('Specifies that log files delivered on or after the '674'specified UTC timestamp value will be validated. '675'Example: "2015-01-08T05:21:42Z".')},676{'name': 'end-time', 'cli_type_name': 'string',677'help_text': ('Optionally specifies that log files delivered on or '678'before the specified UTC timestamp value will be '679'validated. The default value is the current time. '680'Example: "2015-01-08T12:31:41Z".')},681{'name': 's3-bucket', 'cli_type_name': 'string',682'help_text': ('Optionally specifies the S3 bucket where the digest '683'files are stored. If a bucket name is not specified, '684'the CLI will retrieve it by calling describe_trails')},685{'name': 's3-prefix', 'cli_type_name': 'string',686'help_text': ('Optionally specifies the optional S3 prefix where the '687'digest files are stored. If not specified, the CLI '688'will determine the prefix automatically by calling '689'describe_trails.')},690{'name': 'account-id', 'cli_type_name': 'string',691'help_text': ('Optionally specifies the account for validating logs. '692'This parameter is needed for organization trails '693'for validating logs for specific account inside an '694'organization')},695{'name': 'verbose', 'cli_type_name': 'boolean',696'action': 'store_true',697'help_text': 'Display verbose log validation information'}698]699700def __init__(self, session):701super(CloudTrailValidateLogs, self).__init__(session)702self.trail_arn = None703self.is_verbose = False704self.start_time = None705self.end_time = None706self.s3_bucket = None707self.s3_prefix = None708self.s3_client_provider = None709self.cloudtrail_client = None710self.account_id = None711self._source_region = None712self._valid_digests = 0713self._invalid_digests = 0714self._valid_logs = 0715self._invalid_logs = 0716self._is_last_status_double_space = True717self._found_start_time = None718self._found_end_time = None719720def _run_main(self, args, parsed_globals):721self.handle_args(args)722self.setup_services(parsed_globals)723self._call()724if self._invalid_digests > 0 or self._invalid_logs > 0:725return 1726return 0727728def handle_args(self, args):729self.trail_arn = args.trail_arn730self.is_verbose = args.verbose731self.s3_bucket = args.s3_bucket732self.s3_prefix = args.s3_prefix733self.account_id = args.account_id734self.start_time = normalize_date(parse_date(args.start_time))735if args.end_time:736self.end_time = normalize_date(parse_date(args.end_time))737else:738self.end_time = normalize_date(get_current_datetime())739if self.start_time > self.end_time:740raise ValueError(('Invalid time range specified: start-time must '741'occur before end-time'))742# Found start time always defaults to the given start time. This value743# may change if the earliest found digest is after the given start744# time. Note that the summary output report of what date ranges were745# actually found is only shown if a valid digest is encountered,746# thereby setting self._found_end_time to a value.747self._found_start_time = self.start_time748749def setup_services(self, parsed_globals):750self._source_region = parsed_globals.region751# Use the the same region as the region of the CLI to get locations.752self.s3_client_provider = S3ClientProvider(753self._session, self._source_region)754client_args = {'region_name': parsed_globals.region,755'verify': parsed_globals.verify_ssl}756self.organization_client = create_nested_client(757self._session, 'organizations', **client_args)758759if parsed_globals.endpoint_url is not None:760client_args['endpoint_url'] = parsed_globals.endpoint_url761self.cloudtrail_client = create_nested_client(762self._session, 'cloudtrail', **client_args)763764def _call(self):765traverser = create_digest_traverser(766trail_arn=self.trail_arn, cloudtrail_client=self.cloudtrail_client,767organization_client=self.organization_client,768trail_source_region=self._source_region,769s3_client_provider=self.s3_client_provider, bucket=self.s3_bucket,770prefix=self.s3_prefix, on_missing=self._on_missing_digest,771on_invalid=self._on_invalid_digest, on_gap=self._on_digest_gap,772account_id=self.account_id)773self._write_startup_text()774digests = traverser.traverse(self.start_time, self.end_time)775for digest in digests:776# Only valid digests are yielded and only valid digests can adjust777# the found times that are reported in the CLI output summary.778self._track_found_times(digest)779self._valid_digests += 1780self._write_status(781'Digest file\ts3://%s/%s\tvalid'782% (digest['digestS3Bucket'], digest['digestS3Object']))783if not digest['logFiles']:784continue785for log in digest['logFiles']:786self._download_log(log)787self._write_summary_text()788789def _track_found_times(self, digest):790# Track the earliest found start time, but do not use a date before791# the user supplied start date.792digest_start_time = parse_date(digest['digestStartTime'])793if digest_start_time > self.start_time:794self._found_start_time = digest_start_time795# Only use the last found end time if it is less than the796# user supplied end time (or the current date).797if not self._found_end_time:798digest_end_time = parse_date(digest['digestEndTime'])799self._found_end_time = min(digest_end_time, self.end_time)800801def _download_log(self, log):802""" Download a log, decompress, and compare SHA256 checksums"""803try:804# Create a client that can work with this bucket.805client = self.s3_client_provider.get_client(log['s3Bucket'])806response = client.get_object(807Bucket=log['s3Bucket'], Key=log['s3Object'])808gzip_inflater = zlib.decompressobj(zlib.MAX_WBITS | 16)809rolling_hash = hashlib.sha256()810for chunk in iter(lambda: response['Body'].read(2048), b""):811data = gzip_inflater.decompress(chunk)812rolling_hash.update(data)813remaining_data = gzip_inflater.flush()814if remaining_data:815rolling_hash.update(remaining_data)816computed_hash = rolling_hash.hexdigest()817if computed_hash != log['hashValue']:818self._on_log_invalid(log)819else:820self._valid_logs += 1821self._write_status(('Log file\ts3://%s/%s\tvalid'822% (log['s3Bucket'], log['s3Object'])))823except ClientError as e:824if e.response['Error']['Code'] != 'NoSuchKey':825raise826self._on_missing_log(log)827except Exception:828self._on_invalid_log_format(log)829830def _write_status(self, message, is_error=False):831if is_error:832if self._is_last_status_double_space:833sys.stderr.write("%s\n\n" % message)834else:835sys.stderr.write("\n%s\n\n" % message)836self._is_last_status_double_space = True837elif self.is_verbose:838self._is_last_status_double_space = False839sys.stdout.write("%s\n" % message)840841def _write_startup_text(self):842sys.stdout.write(843'Validating log files for trail %s between %s and %s\n\n'844% (self.trail_arn, format_display_date(self.start_time),845format_display_date(self.end_time)))846847def _write_summary_text(self):848if not self._is_last_status_double_space:849sys.stdout.write('\n')850sys.stdout.write('Results requested for %s to %s\n'851% (format_display_date(self.start_time),852format_display_date(self.end_time)))853if not self._valid_digests and not self._invalid_digests:854sys.stdout.write('No digests found\n')855return856if not self._found_start_time or not self._found_end_time:857sys.stdout.write('No valid digests found in range\n')858else:859sys.stdout.write('Results found for %s to %s:\n'860% (format_display_date(self._found_start_time),861format_display_date(self._found_end_time)))862self._write_ratio(self._valid_digests, self._invalid_digests, 'digest')863self._write_ratio(self._valid_logs, self._invalid_logs, 'log')864sys.stdout.write('\n')865866def _write_ratio(self, valid, invalid, name):867total = valid + invalid868if total > 0:869sys.stdout.write('\n%d/%d %s files valid' % (valid, total, name))870if invalid > 0:871sys.stdout.write(', %d/%d %s files INVALID' % (invalid, total,872name))873874def _on_missing_digest(self, bucket, last_key, **kwargs):875self._invalid_digests += 1876self._write_status('Digest file\ts3://%s/%s\tINVALID: not found'877% (bucket, last_key), True)878879def _on_digest_gap(self, **kwargs):880self._write_status(881'No log files were delivered by CloudTrail between %s and %s'882% (format_display_date(kwargs['next_end_date']),883format_display_date(kwargs['last_start_date'])), True)884885def _on_invalid_digest(self, message, **kwargs):886self._invalid_digests += 1887self._write_status(message, True)888889def _on_invalid_log_format(self, log_data):890self._invalid_logs += 1891self._write_status(892('Log file\ts3://%s/%s\tINVALID: invalid format'893% (log_data['s3Bucket'], log_data['s3Object'])), True)894895def _on_log_invalid(self, log_data):896self._invalid_logs += 1897self._write_status(898"Log file\ts3://%s/%s\tINVALID: hash value doesn't match"899% (log_data['s3Bucket'], log_data['s3Object']), True)900901def _on_missing_log(self, log_data):902self._invalid_logs += 1903self._write_status(904'Log file\ts3://%s/%s\tINVALID: not found'905% (log_data['s3Bucket'], log_data['s3Object']), True)906907908