从多个文件字节流在内存中创建tar流

问题描述 投票:0回答:1

我正在尝试在内存中创建tar流,向其中添加文件,然后将其保存到S3。但是存在一些问题,ta中的文件大小为零。有人可以建议吗?下面的代码段-

def tar_and_upload(bucket, keys, dest_bucket):
    s3 = boto3.client('s3')
    file_obj = io.BytesIO()
    tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)    
    response = {}
    for key in keys:
        obj = s3.get_object(Bucket=bucket, Key=key)
        _bytes = obj["Body"].read()
        _file_name = key.split("/")[-1]
        tar_file_obj.addfile(tarfile.TarInfo(_file_name), _bytes)
    tar_file_obj.close()
    try:
        obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
        s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
    except Exception as e:
        logging.error("Can't save tar to S3", exc_info=True)
        return
python amazon-s3 tar tarfile
1个回答
0
投票

显然,当将字节流添加到tar时,我们需要显式指定大小。示例代码-

import tarfile
import uuid
import io
import os

def tar_and_upload():
    file_obj = io.BytesIO()
    tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
    for filename in os.listdir("images"):
      print(filename)
      file_path = os.path.join("images", filename)
      #tar_file_obj.add(file_path)
      with open(file_path, "rb") as f:
        _bytes = f.read()
        tar_info = tarfile.TarInfo(filename)
        tar_info.size = len(_bytes)
        tar_file_obj.addfile(tar_info, io.BytesIO(_bytes))
    tar_file_obj.close()
    try:
        obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
        object_path = os.path.join("temp", obj_name)
        with open(object_path, "wb") as f:
          f.write(file_obj.getvalue())
        print(obj_name)
    except Exception as e:
        print(str(e))

if __name__ == "__main__":
    tar_and_upload()
© www.soinside.com 2019 - 2024. All rights reserved.