# Copyright 2017-2019 Manheim / Cox Automotive
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Script to clean up the custodian S3 buckets, by moving logs from any deleted
policies to an "archived-logs/" prefix.
"""
import sys
import logging
import boto3
import argparse
import yaml
try:
from yaml import CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeLoader
from manheim_c7n_tools.utils import set_log_info, set_log_debug
from manheim_c7n_tools.version import VERSION
logger = logging.getLogger(__name__)
[docs]class S3Archiver(object):
def __init__(self, region_name, bucket_name, conf_file, dryrun=False):
logger.info('Connecting to S3 in %s for bucket %s (config file: %s)',
region_name, bucket_name, conf_file)
self._s3 = boto3.resource('s3', region_name=region_name)
self._region_name = region_name
self._bucket_name = bucket_name
self._bucket = self._s3.Bucket(bucket_name)
self._conf_file = conf_file
self._dryrun = dryrun
[docs] def run(self):
policy_names = self._get_policy_names()
logger.debug('Found %d policies in config', len(policy_names))
prefixes = self._get_s3_policy_prefixes()
logger.debug('Found %d policy prefixes in S3 bucket', len(prefixes))
move_count = 0
for p in prefixes:
if p not in policy_names:
self._move_prefix(p)
move_count += 1
logger.info('Archived %d policy name prefixes', move_count)
[docs] def _move_prefix(self, policy_name):
"""
Given a policy name key prefix in S3, move everything under it to the
``archived-logs/`` prefix.
:param policy_name: name of the policy prefix in S3
:type policy_name: str
"""
if not self._dryrun:
logger.info('Moving policy prefix logs/%s to archived-logs/%s',
policy_name, policy_name)
count = 0
for o in self._bucket.objects.filter(
Prefix='logs/%s/' % policy_name
):
dest = o.key.replace('logs/', 'archived-logs/')
self._s3_move_file(o, dest)
count += 1
if self._dryrun:
logger.info('DRYRUN: Would move %d objects under logs/%s to '
'archived-logs/%s', count, policy_name, policy_name)
else:
logger.info('Moved %d objects under logs/%s to archived-logs/%s',
count, policy_name, policy_name)
[docs] def _s3_move_file(self, obj_summary, dest_key):
"""
S3 doesn't have any built-in logic for "moving" or "renaming" an object.
The way `awscli` and all of the SDK examples do this is by copying the
source to the destination, then deleting the source. To make this a bit
more unweildy, it's far easier to copy with the boto3 client than with
the fancy Resource-oriented API.
:param obj_summary: the S3 ObjectSummary instance to move
:type obj_summary: ``boto3.S3.ObjectSummary``
:param dest_key: S3 key to move to
:type dest_key: str
"""
if self._dryrun:
logger.debug('DRYRUN: Would move %s to %s',
obj_summary.key, dest_key)
return
# ELSE not a dry run, actually do it
client = self._bucket.meta.client
logger.debug('Copying %s to %s', obj_summary.key, dest_key)
client.copy_object(
ACL='private',
Bucket=self._bucket.name,
Key=dest_key,
CopySource={
'Bucket': self._bucket.name,
'Key': obj_summary.key
},
MetadataDirective='COPY',
TaggingDirective='COPY'
)
# ok, copied, now delete
logger.debug('Deleting %s', obj_summary.key)
obj_summary.delete()
[docs] def _get_s3_policy_prefixes(self):
"""
Find all of the per-policy prefixes (a.k.a. "directories") in the S3
bucket. Return a list of them
:return: list of per-policy prefixes in S3 bucket
:rtype: list
"""
client = self._bucket.meta.client
response = client.list_objects(
Bucket=self._bucket.name,
Delimiter='/',
Prefix='logs/'
)
if response['IsTruncated']:
raise RuntimeError('ERROR: S3 response was truncated!')
result = []
for pname in response.get('CommonPrefixes', []):
result.append(pname['Prefix'].replace('logs/', '').strip('/'))
return result
[docs] def _get_policy_names(self):
"""
Read the custodian config file; return a list of policy names.
:return: list of policy names
:rtype: list
"""
with open(self._conf_file, 'r') as fh:
contents = fh.read()
data = yaml.load(contents, Loader=SafeLoader)
return [p['name'] for p in data['policies']]
[docs]def parse_args(argv):
p = argparse.ArgumentParser(
description='Archive S3 logs for deleted policies'
)
p.add_argument('-V', '--version', action='version', version=VERSION)
p.add_argument('-v', '--verbose', dest='verbose', action='count', default=0,
help='verbose output. specify twice for debug-level output.')
p.add_argument('-d', '--dry-run', dest='dryrun', action='store_true',
default=False,
help='print what would be done; dont move anything')
p.add_argument('REGION_NAME', action='store', type=str,
help='AWS region name to run against')
p.add_argument('BUCKET_NAME', action='store', type=str,
help='S3 Bucket Name')
p.add_argument('CONF_FILE', action='store', type=str,
help='path to cloud-custodian config YML file')
args = p.parse_args(argv)
return args
[docs]def main():
# setup logging for direct command-line use
global logger
FORMAT = "[%(asctime)s %(levelname)s] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger()
# suppress boto3 internal logging below WARNING level
boto3_log = logging.getLogger("boto3")
boto3_log.setLevel(logging.WARNING)
boto3_log.propagate = True
# suppress botocore internal logging below WARNING level
botocore_log = logging.getLogger("botocore")
botocore_log.setLevel(logging.WARNING)
botocore_log.propagate = True
# end setup logging
args = parse_args(sys.argv[1:])
# set logging level
if args.verbose > 1:
set_log_debug(logger)
elif args.verbose == 1:
set_log_info(logger)
S3Archiver(
args.REGION_NAME, args.BUCKET_NAME, args.CONF_FILE, dryrun=args.dryrun
).run()
if __name__ == "__main__":
main()