从 tar.gz 存档中提取特定文件夹

问题描述 投票:0回答:2

您能否建议我如何提取特定文件夹(和文件)并忽略存档内目录的根级别?

<ROOT_FOLDER>/dir1/file1.txt
<ROOT_FOLDER>/dir1/file2.txt
<ROOT_FOLDER>/dir1/dir2/file3.txt

我编写了一个函数,但它仍然为我提供了存档中的所有文件,尽管有endswith()过滤器。

#EXAMPLE!!! => package_basename = <ROOT_FOLDER>

tar_archive = os.path.join(directory_path, package_name)
destination = os.path.join(directory_temp, package_renamed)

def get_archive_content(tar_archive, package_basename):
    with tarfile.open(tar_archive, 'r:gz') as tar_file:
        members = tar_file.getmembers()

    filtered_members = [
        member.name.replace(package_basename + '/', './') for member in members
    ]

    return filtered_members    

members = get_archive_content(tar_archive, package_basename)           

def extract_required_files() -> None:
    with tarfile.open(tar_archive, 'r:gz') as tar_file:
        for member in members:
            if any(member.endswith(path + filename) for path in folder_libs for filename in libs):
                tar_file.extract(member, destination)

            elif any(member.endswith(filename) for filename in config_files):
                tar_file.extract(member, destination)
                
            elif any(re.search(rf"{filename}*.*", member) for filename in config_default_files):
                tar_file.extract(member, destination)
                        
            elif any(member.endswith(filename) for filename in script_files):
                tar_file.extract(member, destination)
                
            elif any(member.endswith(filename) for filename in start_files):
                tar_file.extract(member, destination)                    

            sys.stdout.write(member + '\n')
            
extract_required_files()

我尝试更改为使用 re.search 而不是 .endswith(),但这没有帮助。

python tar archive
2个回答
1
投票

您需要使用全名而不是过滤后的名称来提取文件/文件夹

package_basename = 'ROOT_FOLDER'
folder_destination = r'path/to/folder/destination'
with tarfile.open(tar_archive, 'r:gz') as tar_file:
    names = tar_file.getnames()
    filtered_names = {name.replace(f'{package_basename}/', ''): name for name in names}
    for name, full_name in filtered_names.items():
        destination = os.path.join(folder_destination, name)

        # get full folder
        if name.startswith('dir1'):
            tar_file.extract(full_name, destination)

        # get a specific file
        elif name.endswith('file3.txt'):
            tar_file.extract(full_name, destination)

0
投票

这有效:

#!/usr/bin/env python3

import tarfile
import os

dir_list = ["openssh-9.6p1/contrib/suse",  "openssh-9.6p1/contrib/aix"]
destination = "/tmp/extracted/"
tar_fn = "openssh-9.6p1.tar.gz"

def filter_tar(tar_fn, dir_list):
    with tarfile.open(tar_fn, 'r:gz') as fh:
        members = fh.getmembers()
        for member in members[:200]:
            for dir in dir_list:
                print(f"Checking {member.name} against {dir}")
                if member.name.startswith(dir):
                    print(f"Matched {member.name} against {dir}")
                    out_dir = os.path.join(destination, os.path.dirname(member.name))
                    print(f"Extracting {member.name} to {out_dir}")
                    fh.extract(member, path=out_dir)


filter_tar(tar_fn, dir_list)
© www.soinside.com 2019 - 2024. All rights reserved.