我想添加一个使用字符串的功能,并在目录的所有文件夹中搜索该字符串的出现,现在该目录可以包含不同的格式,如 .xml、.java 等。有没有任何解决方案可以使用 python 实现这一点? 而且可能有很多文件,那么是否还有一种方法可以使用库或特定算法来优化它?
我尝试使用正常的操作系统操作 readfile 它只给我 .txt 文件的输出
考虑为仅搜索文件名和在文件内搜索时创建不同的函数:
import fnmatch
import os
from typing import List
def find_string_in_file_names(directory_path: str, search_string: str, extensions: List[str]=None) -> None:
"""Searches for a given string in the names of all files with specified extensions in a given directory.
Args:
directory_path (str): The path to the directory to search in.
search_string (str): The string to search for in file names.
extensions (List[str], optional): A list of file extensions to limit the search to.
If None, the search will be performed in all files. Defaults to None.
"""
if extensions is None:
extensions = ['.*']
for foldername, _, filenames in os.walk(directory_path):
for extension in extensions:
for filename in fnmatch.filter(filenames, '*' + extension):
if search_string in filename:
print('Found in file name:', os.path.join(foldername, filename))
def find_string_in_files(directory_path: str, search_string: str, extensions: List[str]=None) -> None:
"""Searches for a given string in all files with specified extensions in a given directory.
Args:
directory_path (str): The path to the directory to search in.
search_string (str): The string to search for.
extensions (List[str], optional): A list of file extensions to limit the search to.
If None, the search will be performed in all files. Defaults to None.
Raises:
IOError: If a file can't be opened.
"""
if extensions is None:
extensions = ['.*']
for foldername, _, filenames in os.walk(directory_path):
for extension in extensions:
for filename in fnmatch.filter(filenames, '*' + extension):
filepath = os.path.join(foldername, filename)
try:
with open(filepath, 'r') as file:
for line in file:
if search_string in line:
print('Found in file content:', filepath)
break
except Exception as e:
print(f"Unable to open the file {filename}. Reason: {str(e)}")
# Usage
find_string_in_file_names('/path/to/directory', 'search string', ['.java', '.xml'])
find_string_in_files('/path/to/directory, 'search string', ['.java', '.xml'])