반응형
AWS GLUE S3 폴더 백업하기(복사 및 삭제)
AWS S3는 폴더명 변경을 지원하지 않는다.
그래서 폴더를 백업할 때, 파일을 복사와 삭제 작업을 해야한다.
import boto3
import logging
from datetime import datetime
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def rename_s3_folders(source_paths):
"""
Rename S3 folders by adding '_BAK' suffix
"""
s3_client = boto3.client('s3')
for source_path in source_paths:
try:
# Remove 's3://' prefix and split bucket and key
path_parts = source_path.replace('s3://', '').rstrip('/').split('/')
bucket_name = path_parts[0]
source_prefix = '/'.join(path_parts[1:]) + '/'
target_prefix = source_prefix.rstrip('/') + '_BAK/'
logger.info(f"Processing folder: {source_path}")
# List and process objects in batches
paginator = s3_client.get_paginator('list_objects_v2')
total_objects = 0
for page in paginator.paginate(Bucket=bucket_name, Prefix=source_prefix):
if 'Contents' not in page:
logger.warning(f"No objects found in {source_path}")
continue
for obj in page['Contents']:
old_key = obj['Key']
new_key = old_key.replace(source_prefix, target_prefix)
# Copy object to new location
logger.info(f"Copying {old_key} to {new_key}")
s3_client.copy_object(
Bucket=bucket_name,
CopySource={'Bucket': bucket_name, 'Key': old_key},
Key=new_key
)
# Delete original object
logger.info(f"Deleting {old_key}")
s3_client.delete_object(
Bucket=bucket_name,
Key=old_key
)
total_objects += 1
logger.info(f"Successfully processed {total_objects} objects in folder: {source_path}")
except Exception as e:
logger.error(f"Error processing folder {source_path}: {str(e)}")
raise
def main():
# List of folders to rename
folders_to_rename = [
's3://abc-s3/qwer/cust1/',
's3://abc-s3/qwer/cust2/',
's3://abc-s3/qwer/cust3/'
]
start_time = datetime.now()
logger.info(f"Starting folder rename operation at {start_time}")
try:
rename_s3_folders(folders_to_rename)
end_time = datetime.now()
duration = end_time - start_time
logger.info(f"Operation completed successfully at {end_time}")
logger.info(f"Total duration: {duration}")
except Exception as e:
logger.error(f"Job failed: {str(e)}")
raise
if __name__ == "__main__":
main()
반응형