본문 바로가기
카테고리 없음

AWS GLUE S3 폴더 및 파일 백업하기(복사 및 삭제)

by 성곤 2025. 4. 8.
반응형

AWS GLUE S3 폴더 백업하기(복사 및 삭제)

 

AWS S3는 폴더명 변경을 지원하지 않는다.

그래서 폴더를 백업할 때, 파일을 복사와 삭제 작업을 해야한다.

 

import boto3
import logging
from datetime import datetime

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def rename_s3_folders(source_paths):
    """
    Rename S3 folders by adding '_BAK' suffix
    """
    s3_client = boto3.client('s3')
    
    for source_path in source_paths:
        try:
            # Remove 's3://' prefix and split bucket and key
            path_parts = source_path.replace('s3://', '').rstrip('/').split('/')
            bucket_name = path_parts[0]
            source_prefix = '/'.join(path_parts[1:]) + '/'
            target_prefix = source_prefix.rstrip('/') + '_BAK/'
            
            logger.info(f"Processing folder: {source_path}")
            
            # List and process objects in batches
            paginator = s3_client.get_paginator('list_objects_v2')
            total_objects = 0
            
            for page in paginator.paginate(Bucket=bucket_name, Prefix=source_prefix):
                if 'Contents' not in page:
                    logger.warning(f"No objects found in {source_path}")
                    continue
                    
                for obj in page['Contents']:
                    old_key = obj['Key']
                    new_key = old_key.replace(source_prefix, target_prefix)
                    
                    # Copy object to new location
                    logger.info(f"Copying {old_key} to {new_key}")
                    s3_client.copy_object(
                        Bucket=bucket_name,
                        CopySource={'Bucket': bucket_name, 'Key': old_key},
                        Key=new_key
                    )
                    
                    # Delete original object
                    logger.info(f"Deleting {old_key}")
                    s3_client.delete_object(
                        Bucket=bucket_name,
                        Key=old_key
                    )
                    
                    total_objects += 1
            
            logger.info(f"Successfully processed {total_objects} objects in folder: {source_path}")
            
        except Exception as e:
            logger.error(f"Error processing folder {source_path}: {str(e)}")
            raise

def main():
    # List of folders to rename
    folders_to_rename = [
        's3://abc-s3/qwer/cust1/',
        's3://abc-s3/qwer/cust2/',
        's3://abc-s3/qwer/cust3/'
    ]
    
    start_time = datetime.now()
    logger.info(f"Starting folder rename operation at {start_time}")
    
    try:
        rename_s3_folders(folders_to_rename)
        
        end_time = datetime.now()
        duration = end_time - start_time
        logger.info(f"Operation completed successfully at {end_time}")
        logger.info(f"Total duration: {duration}")
        
    except Exception as e:
        logger.error(f"Job failed: {str(e)}")
        raise

if __name__ == "__main__":
    main()
반응형