我正在尝试在内存中创建tar流,向其中添加文件,然后将其保存到S3。但是存在一些问题,ta中的文件大小为零。有人可以建议吗?下面的代码段-
def tar_and_upload(bucket, keys, dest_bucket):
s3 = boto3.client('s3')
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
response = {}
for key in keys:
obj = s3.get_object(Bucket=bucket, Key=key)
_bytes = obj["Body"].read()
_file_name = key.split("/")[-1]
tar_file_obj.addfile(tarfile.TarInfo(_file_name), _bytes)
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
s3.put_object(Body=file_obj.getvalue(), Bucket=dest_bucket, Key=obj_name)
except Exception as e:
logging.error("Can't save tar to S3", exc_info=True)
return
显然,当将字节流添加到tar时,我们需要显式指定大小。示例代码-
import tarfile
import uuid
import io
import os
def tar_and_upload():
file_obj = io.BytesIO()
tar_file_obj = tarfile.open(mode = "w:gz", fileobj=file_obj)
for filename in os.listdir("images"):
print(filename)
file_path = os.path.join("images", filename)
#tar_file_obj.add(file_path)
with open(file_path, "rb") as f:
_bytes = f.read()
tar_info = tarfile.TarInfo(filename)
tar_info.size = len(_bytes)
tar_file_obj.addfile(tar_info, io.BytesIO(_bytes))
tar_file_obj.close()
try:
obj_name = "{}.tar.gz".format(str(uuid.uuid4()))
object_path = os.path.join("temp", obj_name)
with open(object_path, "wb") as f:
f.write(file_obj.getvalue())
print(obj_name)
except Exception as e:
print(str(e))
if __name__ == "__main__":
tar_and_upload()