在 Azure Synapse 笔记本中解压缩后尝试重命名文件,但在重命名之前我一直成功(代码在 rename_file 方法的最后一行失败)
from azure.storage.filedatalake import DataLakeServiceClient
import os
import shutil
import io
import zipfile
class FileTransformer:
def __init__(self,sourcePath,sourceName,solutionFolder):
#class variables
self.source_path = sourcePath
self.source_name = sourceName
self.sink_path = f"0_unzipped/{sourcePath}/{sourceName}"
self.solution_folder = solutionFolder
def access_source(self,credentials):
dl_service_client_source = DataLakeServiceClient(f"https://{'lake'}.dfs.core.windows.net", credential=credential)
dl_file_system_client_source = dl_service_client_source.get_file_system_client("raw")
dl_directory_client_source = dl_file_system_client_source.get_directory_client(self.source_path)
dl_source_file_client = dl_directory_client_source.get_file_client(self.source_name)
return dl_source_file_client
def extract_data(self,credentials):
compressed_data = io.BytesIO()
# self.access_sink(credentials)
self.access_source(credentials).download_file().readinto(compressed_data)
mounts = []
for mount in mssparkutils.fs.mounts():
mounts.append(mount.mountPoint)
if "/sink" not in mounts:
mssparkutils.fs.mount(
f"abfss://{'refined'}@{'lake'}.dfs.core.windows.net/{self.solution_folder}",
"/sink",
{"linkedService": "ls_lake"}
)
extraction_path = f"/synfs/{mssparkutils.env.getJobId()}/sink/{self.sink_path}"
with zipfile.ZipFile(compressed_data) as zip_file:
zip_file.extractall(extraction_path)
dl_service_client_source = DataLakeServiceClient(f"https://{'lake'}.dfs.core.windows.net", credential=credential)
dl_file_system_client_source = dl_service_client_source.get_file_system_client("refined")
dl_directory_client_source = dl_file_system_client_source.get_file_client(self.solution_folder)
files1 = (self.solution_folder + '/' + self.sink_path)
#.replace(" ", "\\ ")
print(files1)
# files = dl_file_system_client_source.get_paths(path=files1)
files = dl_file_system_client_source.get_paths(path=files1)
# print(self.solution_folder)
# print(self.source_path)
# print(self.source_name)
# print(files)
# print(f"files: {files}")
for file in files:
if not file.is_directory:
file_client = dl_file_system_client_source.get_file_client(file.name)
# file_client = dl_directory_client_source.get_file_client(file_system=dl_file_system_client_source,file_path=file.name)
# file_client = dl_directory_client_source.get_file_client(file.name)
dir_path, file_name1 = os.path.split(file.name)
file_name, file_extension = os.path.splitext(file.name)
print(f"file_name: {file_name}")
print(f"files.name: {file.name}")
# print(f"file.name: {file.name}")
# print(f"file_name: {file_name}")
# print(f"file_extension: {file_extension}")
new_name = f'test{file_extension}'
# print(f"new_name: {new_name}")
new_file_path = os.path.join(dir_path, new_name)
print(f"new_file_path: {new_file_path}")
# print(f"solution_folder: {self.solution_folder}")
file_client.rename_file(new_file_path)
######
client_id = get_secret(secret_loc="ls_keyvault", secret_name="client-id")
client_secret = get_secret(secret_loc="ls_keyvault", secret_name="app-secret")
credential = ClientSecretCredential(
tenant_id="test.onmicrosoft.com",
client_id=client_id,
client_secret=client_secret
)
file = FileTransformer("Path3/Path4/Path5/Path6","FolderName","Path1/Path2")
file.extract_data(credential)
一切都在正确打印:new_file_path、new_name、extension 等,但在类的最后一行代码中仍然收到错误 “file_client.rename_file(new_file_path)”
完整引用:
HttpResponseError Traceback (most recent call last)
/tmp/ipykernel_20528/2509103003.py in <module>
107
108 file = FileTransformerFileTransformer("Path3/Path4/Path5/Path6","FolderName","Path1/Path2")
--> 109 file.extract_data(credential)
/tmp/ipykernel_20528/2509103003.py in extract_data(self, credentials)
78 print(f"new_file_path: {new_file_path}")
79 # print(f"solution_folder: {self.solution_folder}")
---> 80 file_client.rename_file(new_file_path)
81
82
~/cluster-env/clonedenv/lib/python3.8/site-packages/azure/storage/filedatalake/_data_lake_file_client.py in rename_file(self, new_name, **kwargs)
840 _location_mode=self._location_mode
841 )
--> 842 new_file_client._rename_path( # pylint: disable=protected-access
843 '/{}/{}{}'.format(quote(unquote(self.file_system_name)),
844 quote(unquote(self.path_name)),
~/cluster-env/clonedenv/lib/python3.8/site-packages/azure/storage/filedatalake/_path_client.py in _rename_path(self, rename_source, **kwargs)
860 return self._client.path.create(**options)
861 except HttpResponseError as error:
--> 862 process_storage_error(error)
863
864 def _get_path_properties(self, **kwargs):
~/cluster-env/clonedenv/lib/python3.8/site-packages/azure/storage/filedatalake/_deserialize.py in process_storage_error(storage_error)
206 try:
207 # `from None` prevents us from double printing the exception (suppresses generated layer error context)
--> 208 exec("raise error from None") # pylint: disable=exec-used # nosec
209 except SyntaxError:
210 raise error
~/cluster-env/clonedenv/lib/python3.8/site-packages/azure/storage/filedatalake/_deserialize.py in <module>
HttpResponseError: (OutOfRangeInput) The specified resource name length is not within the permissible limits.
RequestId:6ef9e79f-a01f-0067-5cb0-62b105000000
Time:2023-03-30T02:34:49.8798540Z
Code: OutOfRangeInput
Message: The specified resource name length is not within the permissible limits.
RequestId:6ef9e79f-a01f-0067-5cb0-62b105000000
Time:2023-03-30T02:34:49.8798540Z