Path: blob/develop/awscli/customizations/cloudtrail/validation.py
2631 views
# Copyright 2012-2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.1#2# Licensed under the Apache License, Version 2.0 (the "License"). You3# may not use this file except in compliance with the License. A copy of4# the License is located at5#6# http://aws.amazon.com/apache2.0/7#8# or in the "license" file accompanying this file. This file is9# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF10# ANY KIND, either express or implied. See the License for the specific11# language governing permissions and limitations under the License.12import base6413import binascii14import json15import hashlib16import logging17import re18import sys19import zlib20from zlib import error as ZLibError21from datetime import timedelta22from dateutil import tz, parser2324from pyasn1.error import PyAsn1Error25import rsa2627from awscli.customizations.cloudtrail.utils import get_trail_by_arn, \28get_account_id_from_arn29from awscli.customizations.commands import BasicCommand30from botocore.exceptions import ClientError31from awscli.compat import get_current_datetime32from awscli.schema import ParameterRequiredError33from awscli.utils import create_nested_client3435LOG = logging.getLogger(__name__)36DATE_FORMAT = '%Y%m%dT%H%M%SZ'37DISPLAY_DATE_FORMAT = '%Y-%m-%dT%H:%M:%SZ'383940def format_date(date):41"""Returns a formatted date string in a CloudTrail date format"""42return date.strftime(DATE_FORMAT)434445def format_display_date(date):46"""Returns a formatted date string meant for CLI output"""47return date.strftime(DISPLAY_DATE_FORMAT)484950def normalize_date(date):51"""Returns a normalized date using a UTC timezone"""52return date.replace(tzinfo=tz.tzutc())535455def extract_digest_key_date(digest_s3_key):56"""Extract the timestamp portion of a manifest file.5758Manifest file names take the following form:59AWSLogs/{account}/CloudTrail-Digest/{region}/{ymd}/{account}_CloudTrail \60-Digest_{region}_{name}_region_{date}.json.gz61"""62return digest_s3_key[-24:-8]636465def parse_date(date_string):66try:67return parser.parse(date_string)68except ValueError:69raise ValueError('Unable to parse date value: %s' % date_string)707172def assert_cloudtrail_arn_is_valid(trail_arn):73"""Ensures that the arn looks correct.7475ARNs look like: arn:aws:cloudtrail:us-east-1:123456789012:trail/foo"""76pattern = re.compile(r'arn:.+:cloudtrail:.+:\d{12}:trail/.+')77if not pattern.match(trail_arn):78raise ValueError('Invalid trail ARN provided: %s' % trail_arn)798081def create_digest_traverser(82cloudtrail_client,83organization_client,84s3_client_provider,85trail_arn,86trail_source_region=None,87on_invalid=None,88on_gap=None,89on_missing=None,90bucket=None,91prefix=None,92account_id=None,93):94"""Creates a CloudTrail DigestTraverser and its object graph.9596:type cloudtrail_client: botocore.client.CloudTrail97:param cloudtrail_client: Client used to connect to CloudTrail98:type organization_client: botocore.client.organizations99:param organization_client: Client used to connect to Organizations100:type s3_client_provider: S3ClientProvider101:param s3_client_provider: Used to create Amazon S3 client per/region.102:param trail_arn: CloudTrail trail ARN103:param trail_source_region: The scanned region of a trail.104:param on_invalid: Callback that is invoked when validating a digest fails.105:param on_gap: Callback that is invoked when a digest has no link to the106previous digest, but there are more digests to validate. This can107happen when a trail is disabled for a period of time.108:param on_missing: Callback that is invoked when a digest file has been109deleted from Amazon S3 but is supposed to be present.110:param bucket: Amazon S3 bucket of the trail if it is different than the111bucket that is currently associated with the trail.112:param prefix: bucket: Key prefix prepended to each digest and log placed113in the Amazon S3 bucket if it is different than the prefix that is114currently associated with the trail.115:param account_id: The account id for which the digest files are116validated. For normal trails this is the caller account, for117organization trails it is the member accout.118119``on_gap``, ``on_invalid``, and ``on_missing`` callbacks are invoked with120the following named arguments:121122- ``bucket`: The next S3 bucket.123- ``next_key``: (optional) Next digest key that was found in the bucket.124- ``next_end_date``: (optional) End date of the next found digest.125- ``last_key``: The last digest key that was found.126- ``last_start_date``: (optional) Start date of last found digest.127- ``message``: (optional) Message string about the notification.128"""129assert_cloudtrail_arn_is_valid(trail_arn)130organization_id = None131if bucket is None:132# Determine the bucket and prefix based on the trail arn.133trail_info = get_trail_by_arn(cloudtrail_client, trail_arn)134LOG.debug('Loaded trail info: %s', trail_info)135bucket = trail_info['S3BucketName']136prefix = trail_info.get('S3KeyPrefix', None)137is_org_trail = trail_info.get('IsOrganizationTrail')138if is_org_trail:139if not account_id:140raise ParameterRequiredError(141"Missing required parameter for organization "142"trail: '--account-id'")143organization_id = organization_client.describe_organization()[144'Organization']['Id']145146# Determine the region from the ARN (e.g., arn:aws:cloudtrail:REGION:...)147trail_region = trail_arn.split(':')[3]148# Determine the name from the ARN (the last part after "/")149trail_name = trail_arn.split('/')[-1]150# If account id is not specified parse it from trail ARN151if not account_id:152account_id = get_account_id_from_arn(trail_arn)153154digest_provider = DigestProvider(155account_id=account_id, trail_name=trail_name,156s3_client_provider=s3_client_provider,157trail_source_region=trail_source_region,158trail_home_region=trail_region,159organization_id=organization_id)160return DigestTraverser(161digest_provider=digest_provider, starting_bucket=bucket,162starting_prefix=prefix, on_invalid=on_invalid, on_gap=on_gap,163on_missing=on_missing,164public_key_provider=PublicKeyProvider(cloudtrail_client))165166167class S3ClientProvider(object):168"""Creates Amazon S3 clients and determines the region name of a client.169170This class will cache the location constraints of previously requested171buckets and cache previously created clients for the same region.172"""173def __init__(self, session, get_bucket_location_region='us-east-1'):174self._session = session175self._get_bucket_location_region = get_bucket_location_region176self._client_cache = {}177self._region_cache = {}178179def get_client(self, bucket_name):180"""Creates an S3 client that can work with the given bucket name"""181region_name = self._get_bucket_region(bucket_name)182return self._create_client(region_name)183184def _get_bucket_region(self, bucket_name):185"""Returns the region of a bucket"""186if bucket_name not in self._region_cache:187client = self._create_client(self._get_bucket_location_region)188result = client.get_bucket_location(Bucket=bucket_name)189region = result['LocationConstraint'] or 'us-east-1'190self._region_cache[bucket_name] = region191return self._region_cache[bucket_name]192193def _create_client(self, region_name):194"""Creates an Amazon S3 client for the given region name"""195if region_name not in self._client_cache:196client = create_nested_client(self._session, 's3', region_name=region_name)197# Remove the CLI error event that prevents exceptions.198self._client_cache[region_name] = client199return self._client_cache[region_name]200201202class DigestError(ValueError):203"""Exception raised when a digest fails to validate"""204pass205206207class DigestSignatureError(DigestError):208"""Exception raised when a digest signature is invalid"""209def __init__(self, bucket, key):210message = ('Digest file\ts3://%s/%s\tINVALID: signature verification '211'failed') % (bucket, key)212super(DigestSignatureError, self).__init__(message)213214215class InvalidDigestFormat(DigestError):216"""Exception raised when a digest has an invalid format"""217def __init__(self, bucket, key):218message = 'Digest file\ts3://%s/%s\tINVALID: invalid format' % (bucket,219key)220super(InvalidDigestFormat, self).__init__(message)221222223class PublicKeyProvider(object):224"""Retrieves public keys from CloudTrail within a date range."""225def __init__(self, cloudtrail_client):226self._cloudtrail_client = cloudtrail_client227228def get_public_keys(self, start_date, end_date):229"""Loads public keys in a date range into a returned dict.230231:type start_date: datetime232:param start_date: Start date of a date range.233:type end_date: datetime234:param end_date: End date of a date range.235:rtype: dict236:return: Returns a dict where each key is the fingerprint of the237public key, and each value is a dict of public key data.238"""239public_keys = self._cloudtrail_client.list_public_keys(240StartTime=start_date, EndTime=end_date)241public_keys_in_range = public_keys['PublicKeyList']242LOG.debug('Loaded public keys in range: %s', public_keys_in_range)243return dict((key['Fingerprint'], key) for key in public_keys_in_range)244245246class DigestProvider(object):247"""248Retrieves digest keys and digests from Amazon S3.249250This class is responsible for determining the full list of digest files251in a bucket and loading digests from the bucket into a JSON decoded252dict. This class is not responsible for validation or iterating from253one digest to the next.254"""255256def __init__(257self,258s3_client_provider,259account_id,260trail_name,261trail_home_region,262trail_source_region=None,263organization_id=None,264):265self._client_provider = s3_client_provider266self.trail_name = trail_name267self.account_id = account_id268self.trail_home_region = trail_home_region269self.trail_source_region = trail_source_region or trail_home_region270self.organization_id = organization_id271272def load_digest_keys_in_range(self, bucket, prefix, start_date, end_date):273"""Returns a list of digest keys in the date range.274275This method uses a list_objects API call and provides a Marker276parameter that is calculated based on the start_date provided.277Amazon S3 then returns all keys in the bucket that start after278the given key (non-inclusive). We then iterate over the keys279until the date extracted from the yielded keys is greater than280the given end_date.281"""282digests = []283marker = self._create_digest_key(start_date, prefix)284s3_digest_files_prefix = self._create_digest_prefix(start_date, prefix)285client = self._client_provider.get_client(bucket)286paginator = client.get_paginator('list_objects')287page_iterator = paginator.paginate(Bucket=bucket, Marker=marker, Prefix=s3_digest_files_prefix)288key_filter = page_iterator.search('Contents[*].Key')289# Create a target start end end date290target_start_date = format_date(normalize_date(start_date))291# Add one hour to the end_date to get logs that spilled over to next.292target_end_date = format_date(293normalize_date(end_date + timedelta(hours=1)))294# Ensure digests are from the same trail.295digest_key_regex = re.compile(self._create_digest_key_regex(prefix))296for key in key_filter:297if key and digest_key_regex.match(key):298# Use a lexicographic comparison to know when to stop.299extracted_date = extract_digest_key_date(key)300if extracted_date > target_end_date:301break302# Only append digests after the start date.303if extracted_date >= target_start_date:304digests.append(key)305return digests306307def fetch_digest(self, bucket, key):308"""Loads a digest by key from S3.309310Returns the JSON decode data and GZIP inflated raw content.311"""312client = self._client_provider.get_client(bucket)313result = client.get_object(Bucket=bucket, Key=key)314try:315digest = zlib.decompress(result['Body'].read(),316zlib.MAX_WBITS | 16)317digest_data = json.loads(digest.decode())318except (ValueError, ZLibError):319# Cannot gzip decode or JSON parse.320raise InvalidDigestFormat(bucket, key)321# Add the expected digest signature and algorithm to the dict.322if 'signature' not in result['Metadata'] \323or 'signature-algorithm' not in result['Metadata']:324raise DigestSignatureError(bucket, key)325digest_data['_signature'] = result['Metadata']['signature']326digest_data['_signature_algorithm'] = \327result['Metadata']['signature-algorithm']328return digest_data, digest329330def _create_digest_key(self, start_date, key_prefix):331"""Computes an Amazon S3 key based on the provided data.332333The computed is what would have been placed in the S3 bucket if334a log digest were created at a specific time. This computed key335does not have to actually exist as it will only be used to as336a Marker parameter in a list_objects call.337338:return: Returns a computed key as a string.339"""340# Subtract one minute to ensure the dates are inclusive.341date = start_date - timedelta(minutes=1)342template = 'AWSLogs/'343template_params = {344'account_id': self.account_id,345'date': format_date(date),346'ymd': date.strftime('%Y/%m/%d'),347'source_region': self.trail_source_region,348'home_region': self.trail_home_region,349'name': self.trail_name350}351if self.organization_id:352template += '{organization_id}/'353template_params['organization_id'] = self.organization_id354template += (355'{account_id}/CloudTrail-Digest/{source_region}/'356'{ymd}/{account_id}_CloudTrail-Digest_{source_region}_{name}_'357'{home_region}_{date}.json.gz'358)359key = template.format(**template_params)360if key_prefix:361key = key_prefix + '/' + key362return key363364def _create_digest_prefix(self, start_date, key_prefix):365"""Creates an S3 prefix to scope listing to trail's region.366367:return: Returns a prefix string to limit S3 listing scope.368"""369template = 'AWSLogs/'370template_params = {371'account_id': self.account_id,372'source_region': self.trail_source_region373}374if self.organization_id:375template += '{organization_id}/'376template_params['organization_id'] = self.organization_id377template += '{account_id}/CloudTrail-Digest/{source_region}'378prefix = template.format(**template_params)379if key_prefix:380prefix = key_prefix + '/' + prefix381return prefix382383def _create_digest_key_regex(self, key_prefix):384"""Creates a regular expression used to match against S3 keys"""385template = 'AWSLogs/'386template_params = {387'account_id': re.escape(self.account_id),388'source_region': re.escape(self.trail_source_region),389'home_region': re.escape(self.trail_home_region),390'name': re.escape(self.trail_name)391}392if self.organization_id:393template += '{organization_id}/'394template_params['organization_id'] = self.organization_id395template += (396'{account_id}/CloudTrail\\-Digest/{source_region}/'397'\\d+/\\d+/\\d+/{account_id}_CloudTrail\\-Digest_'398'{source_region}_{name}_{home_region}_.+\\.json\\.gz'399)400key = template.format(**template_params)401if key_prefix:402key = re.escape(key_prefix) + '/' + key403return '^' + key + '$'404405406class DigestTraverser(object):407"""Retrieves and validates digests within a date range."""408# These keys are required to be present before validating the contents409# of a digest.410required_digest_keys = ['digestPublicKeyFingerprint', 'digestS3Bucket',411'digestS3Object', 'previousDigestSignature',412'digestEndTime', 'digestStartTime']413414def __init__(self, digest_provider, starting_bucket, starting_prefix,415public_key_provider, digest_validator=None,416on_invalid=None, on_gap=None, on_missing=None):417"""418:type digest_provider: DigestProvider419:param digest_provider: DigestProvider object420:param starting_bucket: S3 bucket where the digests are stored.421:param starting_prefix: An optional prefix applied to each S3 key.422:param public_key_provider: Provides public keys for a range.423:param digest_validator: Validates digest using a validate method.424:param on_invalid: Callback invoked when a digest is invalid.425:param on_gap: Callback invoked when a digest has no parent, but426there are still more digests to validate.427:param on_missing: Callback invoked when a digest file is missing.428"""429self.starting_bucket = starting_bucket430self.starting_prefix = starting_prefix431self.digest_provider = digest_provider432self._public_key_provider = public_key_provider433self._on_gap = on_gap434self._on_invalid = on_invalid435self._on_missing = on_missing436if digest_validator is None:437digest_validator = Sha256RSADigestValidator()438self._digest_validator = digest_validator439440def traverse(self, start_date, end_date=None):441"""Creates and returns a generator that yields validated digest data.442443Each yielded digest dictionary contains information about the digest444and the log file associated with the digest. Digest files are validated445before they are yielded. Whether or not the digest is successfully446validated is stated in the "isValid" key value pair of the yielded447dictionary.448449:type start_date: datetime450:param start_date: Date to start validating from (inclusive).451:type start_date: datetime452:param end_date: Date to stop validating at (inclusive).453"""454if end_date is None:455end_date = get_current_datetime()456end_date = normalize_date(end_date)457start_date = normalize_date(start_date)458bucket = self.starting_bucket459prefix = self.starting_prefix460digests = self._load_digests(bucket, prefix, start_date, end_date)461public_keys = self._load_public_keys(start_date, end_date)462key, end_date = self._get_last_digest(digests)463last_start_date = end_date464while key and start_date <= last_start_date:465try:466digest, end_date = self._load_and_validate_digest(467public_keys, bucket, key)468last_start_date = normalize_date(469parse_date(digest['digestStartTime']))470previous_bucket = digest.get('previousDigestS3Bucket', None)471yield digest472if previous_bucket is None:473# The chain is broken, so find next in digest store.474key, end_date = self._find_next_digest(475digests=digests, bucket=bucket, last_key=key,476last_start_date=last_start_date, cb=self._on_gap,477is_cb_conditional=True)478else:479key = digest['previousDigestS3Object']480if previous_bucket != bucket:481bucket = previous_bucket482# The bucket changed so reload the digest list.483digests = self._load_digests(484bucket, prefix, start_date, end_date)485except ClientError as e:486if e.response['Error']['Code'] != 'NoSuchKey':487raise e488key, end_date = self._find_next_digest(489digests=digests, bucket=bucket, last_key=key,490last_start_date=last_start_date, cb=self._on_missing,491message=str(e))492except DigestError as e:493key, end_date = self._find_next_digest(494digests=digests, bucket=bucket, last_key=key,495last_start_date=last_start_date, cb=self._on_invalid,496message=str(e))497except Exception as e:498# Any other unexpected errors.499key, end_date = self._find_next_digest(500digests=digests, bucket=bucket, last_key=key,501last_start_date=last_start_date, cb=self._on_invalid,502message='Digest file\ts3://%s/%s\tINVALID: %s'503% (bucket, key, str(e)))504505def _load_digests(self, bucket, prefix, start_date, end_date):506return self.digest_provider.load_digest_keys_in_range(507bucket=bucket, prefix=prefix,508start_date=start_date, end_date=end_date)509510def _find_next_digest(self, digests, bucket, last_key, last_start_date,511cb=None, is_cb_conditional=False, message=None):512"""Finds the next digest in the bucket and invokes any callback."""513next_key, next_end_date = self._get_last_digest(digests, last_key)514if cb and (not is_cb_conditional or next_key):515cb(bucket=bucket, next_key=next_key, last_key=last_key,516next_end_date=next_end_date, last_start_date=last_start_date,517message=message)518return next_key, next_end_date519520def _get_last_digest(self, digests, before_key=None):521"""Finds the previous digest key (either the last or before before_key)522523If no key is provided, the last digest is used. If a digest is found,524the end date of the provider is adjusted to match the found key's end525date.526"""527if not digests:528return None, None529elif before_key is None:530next_key = digests.pop()531next_key_date = normalize_date(532parse_date(extract_digest_key_date(next_key)))533return next_key, next_key_date534# find a key before the given key.535before_key_date = parse_date(extract_digest_key_date(before_key))536while digests:537next_key = digests.pop()538next_key_date = normalize_date(539parse_date(extract_digest_key_date(next_key)))540if next_key_date < before_key_date:541LOG.debug("Next found key: %s", next_key)542return next_key, next_key_date543return None, None544545def _load_and_validate_digest(self, public_keys, bucket, key):546"""Loads and validates a digest from S3.547548:param public_keys: Public key dictionary of fingerprint to dict.549:return: Returns a tuple of the digest data as a dict and end_date550:rtype: tuple551"""552digest_data, digest = self.digest_provider.fetch_digest(bucket, key)553for required_key in self.required_digest_keys:554if required_key not in digest_data:555raise InvalidDigestFormat(bucket, key)556# Ensure the bucket and key are the same as what's expected.557if digest_data['digestS3Bucket'] != bucket \558or digest_data['digestS3Object'] != key:559raise DigestError(560('Digest file\ts3://%s/%s\tINVALID: has been moved from its '561'original location') % (bucket, key))562# Get the public keys in the given time range.563fingerprint = digest_data['digestPublicKeyFingerprint']564if fingerprint not in public_keys:565raise DigestError(566('Digest file\ts3://%s/%s\tINVALID: public key not found in '567'region %s for fingerprint %s') %568(bucket, key, self.digest_provider.trail_home_region,569fingerprint))570public_key_hex = public_keys[fingerprint]['Value']571self._digest_validator.validate(572bucket, key, public_key_hex, digest_data, digest)573end_date = normalize_date(parse_date(digest_data['digestEndTime']))574return digest_data, end_date575576def _load_public_keys(self, start_date, end_date):577public_keys = self._public_key_provider.get_public_keys(578start_date, end_date)579if not public_keys:580raise RuntimeError(581'No public keys found between %s and %s' %582(format_display_date(start_date),583format_display_date(end_date)))584return public_keys585586587class Sha256RSADigestValidator(object):588"""589Validates SHA256withRSA signed digests.590591The result of validating the digest is inserted into the digest_data592dictionary using the isValid key value pair.593"""594595def validate(self, bucket, key, public_key, digest_data, inflated_digest):596"""Validates a digest file.597598Throws a DigestError when the digest is invalid.599600:param bucket: Bucket of the digest file601:param key: Key of the digest file602:param public_key: Public key bytes.603:param digest_data: Dict of digest data returned when JSON604decoding a manifest.605:param inflated_digest: Inflated digest file contents as bytes.606"""607try:608decoded_key = base64.b64decode(public_key)609public_key = rsa.PublicKey.load_pkcs1(decoded_key, format='DER')610to_sign = self._create_string_to_sign(digest_data, inflated_digest)611signature_bytes = binascii.unhexlify(digest_data['_signature'])612rsa.verify(to_sign, signature_bytes, public_key)613except PyAsn1Error:614raise DigestError(615('Digest file\ts3://%s/%s\tINVALID: Unable to load PKCS #1 key'616' with fingerprint %s')617% (bucket, key, digest_data['digestPublicKeyFingerprint']))618except rsa.pkcs1.VerificationError:619# Note from the Python-RSA docs: Never display the stack trace of620# a rsa.pkcs1.VerificationError exception. It shows where in the621# code the exception occurred, and thus leaks information about622# the key.623raise DigestSignatureError(bucket, key)624625def _create_string_to_sign(self, digest_data, inflated_digest):626previous_signature = digest_data['previousDigestSignature']627if previous_signature is None:628# The value must be 'null' to match the Java implementation.629previous_signature = 'null'630string_to_sign = "%s\n%s/%s\n%s\n%s" % (631digest_data['digestEndTime'],632digest_data['digestS3Bucket'],633digest_data['digestS3Object'],634hashlib.sha256(inflated_digest).hexdigest(),635previous_signature)636LOG.debug('Digest string to sign: %s', string_to_sign)637return string_to_sign.encode()638639640class CloudTrailValidateLogs(BasicCommand):641"""642Validates log digests and log files, optionally saving them to disk.643"""644NAME = 'validate-logs'645DESCRIPTION = """646Validates CloudTrail logs for a given period of time.647648This command uses the digest files delivered to your S3 bucket to perform649the validation.650651The AWS CLI allows you to detect the following types of changes:652653- Modification or deletion of CloudTrail log files.654- Modification or deletion of CloudTrail digest files.655656To validate log files with the AWS CLI, the following preconditions must657be met:658659- You must have online connectivity to AWS.660- You must have read access to the S3 bucket that contains the digest and661log files.662- The digest and log files must not have been moved from the original S3663location where CloudTrail delivered them.664- For organization trails you must have access to describe-organization to665validate digest files666667When you disable Log File Validation, the chain of digest files is broken668after one hour. CloudTrail will not digest log files that were delivered669during a period in which the Log File Validation feature was disabled.670For example, if you enable Log File Validation on January 1, disable it671on January 2, and re-enable it on January 10, digest files will not be672created for the log files delivered from January 3 to January 9. The same673applies whenever you stop CloudTrail logging or delete a trail.674675.. note::676677Log files that have been downloaded to local disk cannot be validated678with the AWS CLI. The CLI will download all log files each time this679command is executed.680681.. note::682683This command requires that the role executing the command has684permission to call ListObjects, GetObject, and GetBucketLocation for685each bucket referenced by the trail.686687"""688689ARG_TABLE = [690{'name': 'trail-arn', 'required': True, 'cli_type_name': 'string',691'help_text': 'Specifies the ARN of the trail to be validated'},692{'name': 'start-time', 'required': True, 'cli_type_name': 'string',693'help_text': ('Specifies that log files delivered on or after the '694'specified UTC timestamp value will be validated. '695'Example: "2015-01-08T05:21:42Z".')},696{'name': 'end-time', 'cli_type_name': 'string',697'help_text': ('Optionally specifies that log files delivered on or '698'before the specified UTC timestamp value will be '699'validated. The default value is the current time. '700'Example: "2015-01-08T12:31:41Z".')},701{'name': 's3-bucket', 'cli_type_name': 'string',702'help_text': ('Optionally specifies the S3 bucket where the digest '703'files are stored. If a bucket name is not specified, '704'the CLI will retrieve it by calling describe_trails')},705{'name': 's3-prefix', 'cli_type_name': 'string',706'help_text': ('Optionally specifies the optional S3 prefix where the '707'digest files are stored. If not specified, the CLI '708'will determine the prefix automatically by calling '709'describe_trails.')},710{'name': 'account-id', 'cli_type_name': 'string',711'help_text': ('Optionally specifies the account for validating logs. '712'This parameter is needed for organization trails '713'for validating logs for specific account inside an '714'organization')},715{'name': 'verbose', 'cli_type_name': 'boolean',716'action': 'store_true',717'help_text': 'Display verbose log validation information'}718]719720def __init__(self, session):721super(CloudTrailValidateLogs, self).__init__(session)722self.trail_arn = None723self.is_verbose = False724self.start_time = None725self.end_time = None726self.s3_bucket = None727self.s3_prefix = None728self.s3_client_provider = None729self.cloudtrail_client = None730self.account_id = None731self._source_region = None732self._valid_digests = 0733self._invalid_digests = 0734self._valid_logs = 0735self._invalid_logs = 0736self._is_last_status_double_space = True737self._found_start_time = None738self._found_end_time = None739740def _run_main(self, args, parsed_globals):741self.handle_args(args)742self.setup_services(parsed_globals)743self._call()744if self._invalid_digests > 0 or self._invalid_logs > 0:745return 1746return 0747748def handle_args(self, args):749self.trail_arn = args.trail_arn750self.is_verbose = args.verbose751self.s3_bucket = args.s3_bucket752self.s3_prefix = args.s3_prefix753self.account_id = args.account_id754self.start_time = normalize_date(parse_date(args.start_time))755if args.end_time:756self.end_time = normalize_date(parse_date(args.end_time))757else:758self.end_time = normalize_date(get_current_datetime())759if self.start_time > self.end_time:760raise ValueError(('Invalid time range specified: start-time must '761'occur before end-time'))762# Found start time always defaults to the given start time. This value763# may change if the earliest found digest is after the given start764# time. Note that the summary output report of what date ranges were765# actually found is only shown if a valid digest is encountered,766# thereby setting self._found_end_time to a value.767self._found_start_time = self.start_time768769def setup_services(self, parsed_globals):770self._source_region = parsed_globals.region771# Use the the same region as the region of the CLI to get locations.772self.s3_client_provider = S3ClientProvider(773self._session, self._source_region)774client_args = {'region_name': parsed_globals.region,775'verify': parsed_globals.verify_ssl}776self.organization_client = create_nested_client(777self._session, 'organizations', **client_args)778779if parsed_globals.endpoint_url is not None:780client_args['endpoint_url'] = parsed_globals.endpoint_url781self.cloudtrail_client = create_nested_client(782self._session, 'cloudtrail', **client_args)783784def _call(self):785traverser = create_digest_traverser(786trail_arn=self.trail_arn, cloudtrail_client=self.cloudtrail_client,787organization_client=self.organization_client,788trail_source_region=self._source_region,789s3_client_provider=self.s3_client_provider, bucket=self.s3_bucket,790prefix=self.s3_prefix, on_missing=self._on_missing_digest,791on_invalid=self._on_invalid_digest, on_gap=self._on_digest_gap,792account_id=self.account_id)793self._write_startup_text()794digests = traverser.traverse(self.start_time, self.end_time)795for digest in digests:796# Only valid digests are yielded and only valid digests can adjust797# the found times that are reported in the CLI output summary.798self._track_found_times(digest)799self._valid_digests += 1800self._write_status(801'Digest file\ts3://%s/%s\tvalid'802% (digest['digestS3Bucket'], digest['digestS3Object']))803if not digest['logFiles']:804continue805for log in digest['logFiles']:806self._download_log(log)807self._write_summary_text()808809def _track_found_times(self, digest):810# Track the earliest found start time, but do not use a date before811# the user supplied start date.812digest_start_time = parse_date(digest['digestStartTime'])813if digest_start_time > self.start_time:814self._found_start_time = digest_start_time815# Only use the last found end time if it is less than the816# user supplied end time (or the current date).817if not self._found_end_time:818digest_end_time = parse_date(digest['digestEndTime'])819self._found_end_time = min(digest_end_time, self.end_time)820821def _download_log(self, log):822""" Download a log, decompress, and compare SHA256 checksums"""823try:824# Create a client that can work with this bucket.825client = self.s3_client_provider.get_client(log['s3Bucket'])826response = client.get_object(827Bucket=log['s3Bucket'], Key=log['s3Object'])828gzip_inflater = zlib.decompressobj(zlib.MAX_WBITS | 16)829rolling_hash = hashlib.sha256()830for chunk in iter(lambda: response['Body'].read(2048), b""):831data = gzip_inflater.decompress(chunk)832rolling_hash.update(data)833remaining_data = gzip_inflater.flush()834if remaining_data:835rolling_hash.update(remaining_data)836computed_hash = rolling_hash.hexdigest()837if computed_hash != log['hashValue']:838self._on_log_invalid(log)839else:840self._valid_logs += 1841self._write_status(('Log file\ts3://%s/%s\tvalid'842% (log['s3Bucket'], log['s3Object'])))843except ClientError as e:844if e.response['Error']['Code'] != 'NoSuchKey':845raise846self._on_missing_log(log)847except Exception:848self._on_invalid_log_format(log)849850def _write_status(self, message, is_error=False):851if is_error:852if self._is_last_status_double_space:853sys.stderr.write("%s\n\n" % message)854else:855sys.stderr.write("\n%s\n\n" % message)856self._is_last_status_double_space = True857elif self.is_verbose:858self._is_last_status_double_space = False859sys.stdout.write("%s\n" % message)860861def _write_startup_text(self):862sys.stdout.write(863'Validating log files for trail %s between %s and %s\n\n'864% (self.trail_arn, format_display_date(self.start_time),865format_display_date(self.end_time)))866867def _write_summary_text(self):868if not self._is_last_status_double_space:869sys.stdout.write('\n')870sys.stdout.write('Results requested for %s to %s\n'871% (format_display_date(self.start_time),872format_display_date(self.end_time)))873if not self._valid_digests and not self._invalid_digests:874sys.stdout.write('No digests found\n')875return876if not self._found_start_time or not self._found_end_time:877sys.stdout.write('No valid digests found in range\n')878else:879sys.stdout.write('Results found for %s to %s:\n'880% (format_display_date(self._found_start_time),881format_display_date(self._found_end_time)))882self._write_ratio(self._valid_digests, self._invalid_digests, 'digest')883self._write_ratio(self._valid_logs, self._invalid_logs, 'log')884sys.stdout.write('\n')885886def _write_ratio(self, valid, invalid, name):887total = valid + invalid888if total > 0:889sys.stdout.write('\n%d/%d %s files valid' % (valid, total, name))890if invalid > 0:891sys.stdout.write(', %d/%d %s files INVALID' % (invalid, total,892name))893894def _on_missing_digest(self, bucket, last_key, **kwargs):895self._invalid_digests += 1896self._write_status('Digest file\ts3://%s/%s\tINVALID: not found'897% (bucket, last_key), True)898899def _on_digest_gap(self, **kwargs):900self._write_status(901'No log files were delivered by CloudTrail between %s and %s'902% (format_display_date(kwargs['next_end_date']),903format_display_date(kwargs['last_start_date'])), True)904905def _on_invalid_digest(self, message, **kwargs):906self._invalid_digests += 1907self._write_status(message, True)908909def _on_invalid_log_format(self, log_data):910self._invalid_logs += 1911self._write_status(912('Log file\ts3://%s/%s\tINVALID: invalid format'913% (log_data['s3Bucket'], log_data['s3Object'])), True)914915def _on_log_invalid(self, log_data):916self._invalid_logs += 1917self._write_status(918"Log file\ts3://%s/%s\tINVALID: hash value doesn't match"919% (log_data['s3Bucket'], log_data['s3Object']), True)920921def _on_missing_log(self, log_data):922self._invalid_logs += 1923self._write_status(924'Log file\ts3://%s/%s\tINVALID: not found'925% (log_data['s3Bucket'], log_data['s3Object']), True)926927928