您能否建议我如何提取特定文件夹(和文件)并忽略存档内目录的根级别?
<ROOT_FOLDER>/dir1/file1.txt
<ROOT_FOLDER>/dir1/file2.txt
<ROOT_FOLDER>/dir1/dir2/file3.txt
我编写了一个函数,但它仍然为我提供了存档中的所有文件,尽管有endswith()过滤器。
#EXAMPLE!!! => package_basename = <ROOT_FOLDER>
tar_archive = os.path.join(directory_path, package_name)
destination = os.path.join(directory_temp, package_renamed)
def get_archive_content(tar_archive, package_basename):
with tarfile.open(tar_archive, 'r:gz') as tar_file:
members = tar_file.getmembers()
filtered_members = [
member.name.replace(package_basename + '/', './') for member in members
]
return filtered_members
members = get_archive_content(tar_archive, package_basename)
def extract_required_files() -> None:
with tarfile.open(tar_archive, 'r:gz') as tar_file:
for member in members:
if any(member.endswith(path + filename) for path in folder_libs for filename in libs):
tar_file.extract(member, destination)
elif any(member.endswith(filename) for filename in config_files):
tar_file.extract(member, destination)
elif any(re.search(rf"{filename}*.*", member) for filename in config_default_files):
tar_file.extract(member, destination)
elif any(member.endswith(filename) for filename in script_files):
tar_file.extract(member, destination)
elif any(member.endswith(filename) for filename in start_files):
tar_file.extract(member, destination)
sys.stdout.write(member + '\n')
extract_required_files()
我尝试更改为使用 re.search 而不是 .endswith(),但这没有帮助。
您需要使用全名而不是过滤后的名称来提取文件/文件夹
package_basename = 'ROOT_FOLDER'
folder_destination = r'path/to/folder/destination'
with tarfile.open(tar_archive, 'r:gz') as tar_file:
names = tar_file.getnames()
filtered_names = {name.replace(f'{package_basename}/', ''): name for name in names}
for name, full_name in filtered_names.items():
destination = os.path.join(folder_destination, name)
# get full folder
if name.startswith('dir1'):
tar_file.extract(full_name, destination)
# get a specific file
elif name.endswith('file3.txt'):
tar_file.extract(full_name, destination)
这有效:
#!/usr/bin/env python3
import tarfile
import os
dir_list = ["openssh-9.6p1/contrib/suse", "openssh-9.6p1/contrib/aix"]
destination = "/tmp/extracted/"
tar_fn = "openssh-9.6p1.tar.gz"
def filter_tar(tar_fn, dir_list):
with tarfile.open(tar_fn, 'r:gz') as fh:
members = fh.getmembers()
for member in members[:200]:
for dir in dir_list:
print(f"Checking {member.name} against {dir}")
if member.name.startswith(dir):
print(f"Matched {member.name} against {dir}")
out_dir = os.path.join(destination, os.path.dirname(member.name))
print(f"Extracting {member.name} to {out_dir}")
fh.extract(member, path=out_dir)
filter_tar(tar_fn, dir_list)