返回目录和子目录中的文件总数

问题描述 投票:0回答:7

尝试创建一个返回找到目录及其子目录的文件数量的函数。只需要帮助即可开始

python recursion count subdirectory pathlib
7个回答
108
投票

一内衬

import os
cpt = sum([len(files) for r, d, files in os.walk("G:\CS\PYTHONPROJECTS")])

31
投票

使用

os.walk
。它会为你做递归。有关示例,请参阅 http://www.pythonforbeginners.com/code-snippets-source-code/python-os-walk/

total = 0
for root, dirs, files in os.walk(folder):
    total += len(files)

6
投票

只需添加一个

elif
语句来处理目录:

def fileCount(folder):
    "count the number of files in a directory"

    count = 0

    for filename in os.listdir(folder):
        path = os.path.join(folder, filename)

        if os.path.isfile(path):
            count += 1
        elif os.path.isfolder(path):
            count += fileCount(path)

    return count

2
投票
  • 这里有一些使用 pathlib 的单行代码,它是标准库的一部分。
  • 使用
    Path.cwd().rglob('*')
    Path('some path').rglob('*')
    ,创建所有文件的生成器。
    • 使用
      list
      *
      解压生成器,然后使用
      len
      获取文件数量。
  • 请参阅如何计算每个子文件夹中的文件总数以获取每个目录的文件总数。
from pathlib import Path

total_dir_files = len(list(Path.cwd().rglob('*')))

# or 
total_dir_files = len([*Path.cwd().rglob('*')])

# or filter for only files using is_file()
file_count = len([f for f in Path.cwd().rglob('*') if f.is_file()])

2
投票

以下是 3 种最流行方法的时间测试:

import os
from datetime import datetime

dir_path = "D:\\Photos"

# os.listdir

def recursive_call(dir_path):
    folder_array = os.listdir(dir_path)
    files = 0
    folders = 0
    for path in folder_array:
        if os.path.isfile(os.path.join(dir_path, path)):
            files += 1
        elif os.path.isdir(os.path.join(dir_path, path)):
            folders += 1
            file_count, folder_count = recursive_call(os.path.join(dir_path, path))
            files += file_count
            folders += folder_count
    return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.listdir): %s seconds" % (datetime.now() - start_time).total_seconds())

# os.walk

start_time = datetime.now()
file_array = [len(files) for r, d, files in os.walk(dir_path)]
files = sum(file_array)
folders = len(file_array)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.walk): %s seconds" % (datetime.now() - start_time).total_seconds())

# os.scandir

def recursive_call(dir_path):
    folder_array = os.scandir(dir_path)
    files = 0
    folders = 0
    for path in folder_array:
        if path.is_file():
            files += 1
        elif path.is_dir():
            folders += 1
            file_count, folder_count = recursive_call(path)
            files += file_count
            folders += folder_count
    return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.scandir): %s seconds" % (datetime.now() - start_time).total_seconds())

结果:

Folders: 53, Files: 29048
Time Taken (os.listdir): 3.074945 seconds

Folders: 53, Files: 29048
Time Taken (os.walk): 0.062022 seconds

Folders: 53, Files: 29048
Time Taken (os.scandir): 0.048984 seconds

结论:

虽然

os.walk
是最优雅的,但
os.scandir
递归实现似乎是最快的。


0
投票

这是我的版本

def fileCount(folder, allowed_extensions=None):
   "count the number of files in a directory and sub directory"
   count = 0
   for base, dirs, files in os.walk(folder):
      for file in files:
         if allowed_extensions and file.endswith(allowed_extensions) or not allowed_extensions:
            count += 1
   return count

scan_dir = r"C:\Users\sannjayy\Desktop"

allowed_extensions = (".jpg", ".mp4")

print(fileCount(scan_dir , allowed_extensions))


0
投票

这是我的拍摄,在我测试的 Windows 上效果很好

from pprint import pprint # Optional
import pathlib

def count_folder_itens(folder, count):
    path = pathlib.Path(folder)
    folder = []
    file_count = 0
    try:
        for item in path.iterdir():
            if item.is_dir():
                # Item is actually a folder, call the function again and store
                # The count result and the structure of this subfolder
                (subfolder, subfile_count) = count_folder_itens(item, file_count)
                # Concatenate the result of the subfolder and continue the loop
                folder.append((str(item), subfolder))
                file_count += subfile_count
            # If the item is not a folder, it's a file, so add +1 to the file_count
            else:
                file_count += 1
        return (folder, file_count)
    except Exception as e:
        print(f'Invalid folder path: {path}')
        return ([], 0)
 
# Escaped folder path
folder_path = r'C:\\Users\\wwwxkz\\Downloads\\'
# Call count_folder_itens starting with 0 itens
result = count_folder_itens(folder_path, 0)

# Number of itens in the folder and its subfolders
print(f'{folder_path}: ' + str(result[1]))
# Print all folder structure recursively
pprint(result[0]) # Optional
© www.soinside.com 2019 - 2024. All rights reserved.