我必须使用 Python Boto API 将文件从一个存储桶移动到另一个存储桶。 (我需要它从第一个存储桶中“剪切”文件并将其“粘贴”到第二个存储桶中)。 最好的方法是什么?
** 注意:如果我有两个不同的访问密钥和秘密密钥,这有关系吗?
如果您使用 boto3(较新的 boto 版本),这非常简单
import boto3
s3 = boto3.resource('s3')
copy_source = {
'Bucket': 'mybucket',
'Key': 'mykey'
}
s3.meta.client.copy(copy_source, 'otherbucket', 'otherkey')
(文档)
我认为 boto S3 文档回答了你的问题。
https://github.com/boto/boto/blob/develop/docs/source/s3_tut.rst
通过 boto 将文件从一个存储桶移动到另一个存储桶实际上是将密钥从源复制到目标,然后从源中删除密钥。
您可以访问存储桶:
import boto
c = boto.connect_s3()
src = c.get_bucket('my_source_bucket')
dst = c.get_bucket('my_destination_bucket')
并迭代键:
for k in src.list():
# copy stuff to your destination here
dst.copy_key(k.key.name, src.name, k.key.name)
# then delete the source key
k.delete()
awscli 对我来说完成这项工作比 boto 处理和删除每个键快 30 倍。可能是由于 awscli 中的多线程。如果您仍然想从 python 脚本运行它而不从中调用 shell 命令,您可以尝试这样的操作:
安装 awscli python 包:
sudo pip install awscli
然后就这么简单:
import os
if os.environ.get('LC_CTYPE', '') == 'UTF-8':
os.environ['LC_CTYPE'] = 'en_US.UTF-8'
from awscli.clidriver import create_clidriver
driver = create_clidriver()
driver.main('s3 mv source_bucket target_bucket --recursive'.split())
如果您有 2 个具有不同访问凭据的不同存储桶。将凭证相应地存储在 ~/.aws 文件夹下的凭证和配置文件中。
您可以使用以下命令从一个具有不同凭据的存储桶复制对象,然后将该对象保存到具有不同凭据的另一个存储桶中:
import boto3
session_src = boto3.session.Session(profile_name=<source_profile_name>)
source_s3_r = session_src.resource('s3')
session_dest = boto3.session.Session(profile_name=<dest_profile_name>)
dest_s3_r = session_dest.resource('s3')
# create a reference to source image
old_obj = source_s3_r.Object(<source_s3_bucket_name>, <prefix_path> + <key_name>)
# create a reference for destination image
new_obj = dest_s3_r.Object(<dest_s3_bucket_name>, old_obj.key)
# upload the image to destination S3 object
new_obj.put(Body=old_obj.get()['Body'].read())
两个存储桶不需要在 ACL 或存储桶策略中相互访问。
如果你愿意
创建已存储在 Amazon S3 中的对象的副本。
那么 copy_object 就是 boto3 中的方法。
我是如何做到的:
import boto3
aws_access_key_id = ""
aws_secret_access_key = ""
bucket_from = ""
bucket_to = ""
s3 = boto3.resource(
's3',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
src = s3.Bucket(bucket_from)
def move_files():
for archive in src.objects.all():
# filters on archive.key might be applied here
s3.meta.client.copy_object(
ACL='public-read',
Bucket=bucket_to,
CopySource={'Bucket': bucket_from, 'Key': archive.key},
Key=archive.key
)
move_files()
在 boto3 中可以通过以下方式轻松完成不同或相同存储桶之间的复制:
import boto3
s3 = boto3.resource('s3')
copy_source = {
'Bucket': 'mybucket',
'Key': 'mykey'
}
bucket = s3.Bucket('otherbucket')
bucket.copy(copy_source, 'otherkey')
# This is a managed transfer that will perform a multipart copy in
# multiple threads if necessary.
存储桶名称必须是字符串而不是存储桶对象。 以下更改对我有用
for k in src.list():
dst.copy_key(k.key, src.name, k.key)
将对象从一个目录移动到另一个目录:
import boto3
def move_s3_object(bucket: str, old_key: str, new_key: str) -> None:
boto3.resource('s3').Object(bucket, new_key).copy_from(CopySource=f'{bucket}/{old_key}')
boto3.client('s3').delete_object(Bucket=bucket, Key=old_key)
# example:
move_s3_object('my_bucket', old_key='tmp/test.txt', new_key='tmp/tmp2/test.txt')
这甚至可能适用于两个不同的桶,但我还没有测试过。
希望这个答案能有所帮助,谢谢@agrawalramakant。
import boto3
# object_key = 'posts/0173c352-f9f8-4bf1-a818-c99b4c9b0c18.jpg'
def move_from_s3_to_s3(object_key):
session_src = boto3.session.Session(aws_access_key_id="",
region_name="ap-south-1",
aws_secret_access_key="")
source_s3_r = session_src.resource('s3')
session_dest = boto3.session.Session(aws_access_key_id="",
region_name="ap-south-1",
aws_secret_access_key="")
dest_s3_r = session_dest.resource('s3')
# create a reference to source image
old_obj = source_s3_r.Object('source_bucket_name', object_key)
# create a reference for destination image
new_obj = dest_s3_r.Object('dest_bucket_name', object_key)
# upload the image to destination S3 object
new_obj.put(Body=old_obj.get()['Body'].read())
我这样做是为了在 2 个 S3 位置之间移动文件。
它处理以下场景:
import boto3
s3 = boto3.resource('s3')
vBucketName = 'xyz-data-store'
#Source and Target Bucket Instantiation
vTargetBkt = s3.Bucket('xyz-data-store')
vSourceBkt = s3.Bucket('xyz-data-store')
#List of File name prefixes you want to move
vSourcePath = ['abc/1/test1_', 'abc/1/test2_'
,'abc/1/test3_','abc/1/test4_']
#List of Folder names you want the files to be moved to
vTargetPath = ['abc/1/test1_', 'abc/1/test2_'
,'abc/1/test3_','abc/1/test4_']
for (sP, tP) in zip(vSourcePath,vTargetPath) :
for se_files in vSourceBkt.objects.filter(Prefix = sP, Delimiter = '/'):
SourceFileName = (se_files.key).split('/')[-1]
copy_source = {
'Bucket': vSourceBkt.name,
'Key': se_files.key
}
#print('SourceFileName ' + SourceFileName)
#print('se_files ' + se_files.key)
TargetFileName = str("{}{}".format(tP,SourceFileName))
print('TargetFileName ' + TargetFileName)
s3.meta.client.copy(copy_source, vBucketName, TargetFileName)
#Delete files in the Source when the code is working
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"s3:*"
],
"Resource": [
"arn:aws:s3:::SOURCE_BUCKET_NAME",
"arn:aws:s3:::SOURCE_BUCKET_NAME/*"
]
},
{
"Effect": "Allow",
"Action": [
"s3:*"
],
"Resource": [
"arn:aws:s3:::DESTINATION_BUCKET_NAME",
"arn:aws:s3:::DESTINATION_BUCKET_NAME/*"
]
}
]
}
boto3_session = boto3.Session(aws_access_key_id=<your access key>,
aws_secret_access_key=<your secret_access_key>)
s3_resource = boto3_session.resource('s3')
bucket = s3_resource.Bucket("<source bucket name>")
for obj in bucket.objects.all():
obj_path = str(obj.key)
copy_source = {
'Bucket': "<source bucket name>",
'Key': obj_path
}
s3_resource.meta.client.copy(copy_source, "<destination bucket name>", obj_path)
使用不同的帐户可能具有挑战性。您需要创建两个不同的会话,两个会话同时访问两个存储桶。 如果您遇到这种情况,您可以执行以下操作:
# create session for source account
sessionSource = boto3.Session(
aws_access_key_id='ACCESS_KEY_SOURCE',
aws_secret_access_key='SECRET_KEY_SOURCE',
region_name='REGION'
)
# create session for target account
sessionTarget = boto3.Session(
aws_access_key_id='ACCESS_KEY_TARGET',
aws_secret_access_key='SECRET_KEY_TARGET',
region_name='REGION'
)
然后您可以通过将文件添加到新存储桶来移动文件,然后从源帐户中删除它
fileToTransfer = source_client.get_object(
Bucket="source-bucket-name",
Key='file-key'
)
target_client.upload_fileobj(
fileToTransfer['Body'],
'target-bucket-name',
'file-key',
)
source_client.delete_object(
Bucket="source-bucket-name",
Key='file-key'
)
这是我用来在 s3 存储桶的子目录中移动文件的代码
# =============================================================================
# CODE TO MOVE FILES within subfolders in S3 BUCKET
# =============================================================================
from boto3.session import Session
ACCESS_KEY = 'a_key'
SECRET_KEY = 's_key'
session = Session(aws_access_key_id=ACCESS_KEY,
aws_secret_access_key=SECRET_KEY)
s3 = session.resource('s3')#creating session of S3 as resource
s3client = session.client('s3')
resp_dw = s3client.list_objects(Bucket='main_bucket', Prefix='sub_folder/', Delimiter="/")
forms2_dw = [x['Key'] for x in resp_dw['Contents'][1:]]#here we got all files list (max limit is 1000 at a time)
reload_no = 0
while len(forms2_dw) != 0 :
#resp_dw = s3client.list_objects(Bucket='main_bucket', Prefix='sub_folder/', Delimiter="/")
#with open('dw_bucket.json','w') as f:
# resp_dws =str(resp_dw)
# f.write(json.dumps(resp_dws))
#forms_dw = [x['Prefix'] for x in resp_dw['CommonPrefixes']]
#forms2_dw = [x['Key'] for x in resp_dw['Contents'][1:]]
#forms2_dw[-1]
total_files = len(forms2_dw)
#i=0
for i in range(total_files):
#zip_filename='1819.zip'
foldername = resp_dw['Contents'][1:][i]['LastModified'].strftime('%Y%m%d')#Put your logic here for folder name
my_bcket = 'main_bucket'
my_file_old = resp_dw['Contents'][1:][i]['Key'] #file to be copied path
zip_filename =my_file_old.split('/')[-1]
subpath_nw='new_sub_folder/'+foldername+"/"+zip_filename #destination path
my_file_new = subpath_nw
#
print str(reload_no)+ '::: copying from====:'+my_file_old+' to :====='+s3_archive_subpath_nw
#print my_bcket+'/'+my_file_old
if zip_filename[-4:] == '.zip':
s3.Object(my_bcket,my_file_new).copy_from(CopySource=my_bcket+'/'+my_file_old)
s3.Object(my_bcket,my_file_old).delete()
print str(i)+' files moved of '+str(total_files)
resp_dw = s3client.list_objects(Bucket='main_bucket', Prefix='sub-folder/', Delimiter="/")
forms2_dw = [x['Key'] for x in resp_dw['Contents'][1:]]
reload_no +=1
使用 s3fs 库可以轻松完成。
import s3fs
src = 'source_bucket'
dst = 'destination_bucket'
s3 = s3fs.S3FileSystem(anon=False,key='aws_s3_key',secret='aws_s3_secret_key')
for i in s3.ls(src,refresh=True): # loading the file names
if 'file_name' in i: # checking the file name
s3.mv(i,dst) # moving file to destination