尝试创建一个返回找到目录及其子目录的文件数量的函数。只需要帮助即可开始
一内衬
import os
cpt = sum([len(files) for r, d, files in os.walk("G:\CS\PYTHONPROJECTS")])
使用
os.walk
。它会为你做递归。有关示例,请参阅 http://www.pythonforbeginners.com/code-snippets-source-code/python-os-walk/。
total = 0
for root, dirs, files in os.walk(folder):
total += len(files)
只需添加一个
elif
语句来处理目录:
def fileCount(folder):
"count the number of files in a directory"
count = 0
for filename in os.listdir(folder):
path = os.path.join(folder, filename)
if os.path.isfile(path):
count += 1
elif os.path.isfolder(path):
count += fileCount(path)
return count
Path.cwd().rglob('*')
或 Path('some path').rglob('*')
,创建所有文件的生成器。
list
或*
解压生成器,然后使用len
获取文件数量。from pathlib import Path
total_dir_files = len(list(Path.cwd().rglob('*')))
# or
total_dir_files = len([*Path.cwd().rglob('*')])
# or filter for only files using is_file()
file_count = len([f for f in Path.cwd().rglob('*') if f.is_file()])
以下是 3 种最流行方法的时间测试:
import os
from datetime import datetime
dir_path = "D:\\Photos"
# os.listdir
def recursive_call(dir_path):
folder_array = os.listdir(dir_path)
files = 0
folders = 0
for path in folder_array:
if os.path.isfile(os.path.join(dir_path, path)):
files += 1
elif os.path.isdir(os.path.join(dir_path, path)):
folders += 1
file_count, folder_count = recursive_call(os.path.join(dir_path, path))
files += file_count
folders += folder_count
return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.listdir): %s seconds" % (datetime.now() - start_time).total_seconds())
# os.walk
start_time = datetime.now()
file_array = [len(files) for r, d, files in os.walk(dir_path)]
files = sum(file_array)
folders = len(file_array)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.walk): %s seconds" % (datetime.now() - start_time).total_seconds())
# os.scandir
def recursive_call(dir_path):
folder_array = os.scandir(dir_path)
files = 0
folders = 0
for path in folder_array:
if path.is_file():
files += 1
elif path.is_dir():
folders += 1
file_count, folder_count = recursive_call(path)
files += file_count
folders += folder_count
return files, folders
start_time = datetime.now()
files, folders = recursive_call(dir_path)
print ("\nFolders: %d, Files: %d" % (folders, files))
print ("Time Taken (os.scandir): %s seconds" % (datetime.now() - start_time).total_seconds())
结果:
Folders: 53, Files: 29048
Time Taken (os.listdir): 3.074945 seconds
Folders: 53, Files: 29048
Time Taken (os.walk): 0.062022 seconds
Folders: 53, Files: 29048
Time Taken (os.scandir): 0.048984 seconds
结论:
虽然
os.walk
是最优雅的,但 os.scandir
递归实现似乎是最快的。
这是我的版本
def fileCount(folder, allowed_extensions=None):
"count the number of files in a directory and sub directory"
count = 0
for base, dirs, files in os.walk(folder):
for file in files:
if allowed_extensions and file.endswith(allowed_extensions) or not allowed_extensions:
count += 1
return count
scan_dir = r"C:\Users\sannjayy\Desktop"
allowed_extensions = (".jpg", ".mp4")
print(fileCount(scan_dir , allowed_extensions))
这是我的拍摄,在我测试的 Windows 上效果很好
from pprint import pprint # Optional
import pathlib
def count_folder_itens(folder, count):
path = pathlib.Path(folder)
folder = []
file_count = 0
try:
for item in path.iterdir():
if item.is_dir():
# Item is actually a folder, call the function again and store
# The count result and the structure of this subfolder
(subfolder, subfile_count) = count_folder_itens(item, file_count)
# Concatenate the result of the subfolder and continue the loop
folder.append((str(item), subfolder))
file_count += subfile_count
# If the item is not a folder, it's a file, so add +1 to the file_count
else:
file_count += 1
return (folder, file_count)
except Exception as e:
print(f'Invalid folder path: {path}')
return ([], 0)
# Escaped folder path
folder_path = r'C:\\Users\\wwwxkz\\Downloads\\'
# Call count_folder_itens starting with 0 itens
result = count_folder_itens(folder_path, 0)
# Number of itens in the folder and its subfolders
print(f'{folder_path}: ' + str(result[1]))
# Print all folder structure recursively
pprint(result[0]) # Optional