Python 脚本不会重命名文件夹中最后一个 .xlsx 文件

问题描述 投票:0回答:1

我正在尝试通过 Python 重命名文件夹中的一系列 .xlsx 格式的 excel 文件。要重命名它们,我需要首先从每个工作簿中获取一些信息。我在执行此操作时遇到了麻烦,因为一旦我获得信息,我就无法重命名,因为该文件“正在被另一个进程使用”,所以我构建了这个解决方法。它似乎有效,除了文件夹中的最后一个文件总是给我同样的错误,即该文件“正在被另一个进程使用”。

import os
from openpyxl import load_workbook
import datetime

def extract_new_file_names(folder_path):
    new_file_names = {}
    for filename in os.listdir(folder_path):
        if filename.endswith(".xlsx"):
            file_path = os.path.join(folder_path, filename)
            new_filename = extract_file_info(file_path)
            new_file_names[file_path] = new_filename
    return new_file_names

def extract_file_info(file_path):
    wb = load_workbook(filename=file_path, read_only=True)
    sheet = wb.active
    
    # Extract information from the workbook
    acronym = file_path.split("_")[0]  # Get acronym from filename
    date_cell = sheet['B4'].value  # Get date from cell B4
    gl_text = sheet['D6'].value  # Get GL text from cell D6
    
    wb.close()
    
    # Process the extracted information and return the required data
    formatted_date = datetime.datetime.strptime(date_cell, "%m/%d/%Y").strftime("%m-%d-%y")  
    bank_account = gl_text.split("- ", 1)[1] 
    return f"{acronym}-{formatted_date} {bank_account} recon.xlsx"

def rename_files(new_file_names):
    for old_file_path, new_filename in new_file_names.items():
        rename_file(old_file_path, new_filename)

def rename_file(old_file_path, new_filename):

    # Rename the file
    try:
        os.rename(old_file_path, os.path.join(os.path.dirname(old_file_path), new_filename))
        print(f"File {old_file_path} renamed to {new_filename}")
    except Exception as e:
        print(f"Failed to rename file {old_file_path}: {e}")

folder_path = "C:\\Folder\\Path\\Is\\Here"
new_file_names = extract_new_file_names(folder_path)
rename_files(new_file_names)

我不明白为什么只有最后一个文件,Excel 似乎仍在运行。我尝试将 rename_file 函数修改为:

def rename_file(old_file_path, new_filename):
    for proc in psutil.process_iter():
        if "EXCEL.EXE" in proc.name():
            proc.kill()
    try:
        os.rename(old_file_path, os.path.join(os.path.dirname(old_file_path), new_filename))
        print(f"File {old_file_path} renamed to {new_filename}")
    except Exception as e:
        print(f"Failed to rename file {old_file_path}: {e}")

但奇怪的是,它只重命名了最后一个文件,并给了我所有其他文件的错误。我不是任何有意义意义上的程序员,只是试图将目前对我们来说每个月非常漫长而乏味的过程自动化。如有任何帮助,我们将不胜感激。

python excel openpyxl
1个回答
0
投票
DATA_DIR = '/Users/path/to/xlsx_dir'
OUTPUT_DIR = '/Users/path/to/xlsx_output_dir'

import os
import pandas as pd
from datetime import datetime

def extract_file_info(dataframe: pd.DataFrame, file_path: str) -> str:
    
    # Extract information from the workbook
    acronym = file_path.split("_")[0]  # Get acronym from filename
    date_cell = dataframe['B4'].value  # Get date from cell B4
    gl_text = dataframe['D6'].value  # Get GL text from cell D6
    
    # Process the extracted information and return the required data
    formatted_date = datetime.strptime(date_cell, "%m/%d/%Y").strftime("%m-%d-%y")  
    bank_account = gl_text.split("- ", 1)[1] 
    return f"{acronym}-{formatted_date} {bank_account} recon.xlsx"

def rename_xlsx_files(data_dir: str, output_dir: str) -> None:
    '''
        The function renames all the xlsx files in the data_dir and saves them in the output_dir.
        While renaming the files, the function extracts information from the workbook and uses it to rename the file.
        In this case, the date cell and text cell are used to rename the file.
        # Note: Kindly try to verify the data frame columns and cell values before using them.
    '''
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.xlsx'):
                file_path = os.path.join(root, file)
                df = pd.read_excel(file_path)
                try:
                    new_file_name = extract_file_info(df, file_path)
                    new_file_path = os.path.join(output_dir, new_file_name)
                    df.to_excel(new_file_path, index=False)
                except Exception as e:
                    print(f"Error occurred while processing {file_path}: {e}")


if __name__ == "__main__":
    for root, dirs, files in os.walk(DATA_DIR):
        for file in files:
            if file.endswith('.xlsx'):
                file_path = os.path.join(root, file)
                df = pd.read_excel(file_path)
                print(f"Processing {file_path}...")
                print(df.head())
                print("\n")
                print(df.columns)
                print("\n")
                print(df['Date'].values)
                print("\n")
                print(df['Date'].values[0])
            break

我尝试复制您的场景,希望能成功,请告诉我它是否对您有任何帮助。

© www.soinside.com 2019 - 2024. All rights reserved.