我试图通过导入 pandas 在 pycharm 中拆分 excel 文件。我的代码是根据键值拆分工作表并正确复制目标中的文本。但源文件格式没有被复制。我希望将工作表与源格式一起分割。 示例文件位于链接中,屏幕截图位于 和
我的代码是:
import pandas as pd
df = pd.read_excel(r'mypath\myfile.xlsx',sheet_name='Sheet2',engine='openpyxl')
target_folder = 'my target folder'
keys = df.groupby('STD')
key_value = keys.groups.keys()
for key in key_value:
splitdf = keys.get_group(key)
splitdf.to_excel(str(target_folder)+str(key)+".xlsx",sheet_name=str(key),index=False)
print("Files created in " + str(target_folder) + "Folder")
这是熊猫做不到的。你必须使用openpyxl。
的itertools.groupby
的建议。
from openpyxl import load_workbook
from itertools import groupby
wb = load_workbook("myfile.xlsx")
ws = wb.active # or wb["Sheet2"]
data = ws.rows
header = [(col._style, col.value) for col in next(data)]
pairs = [[(r._style, r.value) for r in rows] for rows in data]
groups = {
std: list(g) for std, g in groupby(
sorted(pairs, key=lambda x: x[1][1]), key=lambda x: x[1][1])
}
for std, vals in groups.items():
std_ws = wb.create_sheet(std)
for idxr, rows in enumerate(*[[header] + vals], 1):
for idxc, (s, v) in enumerate(rows, 1):
wb[std].cell(idxr, idxc).value = v
wb[std].cell(idxr, idxc)._style = s
# https://stackoverflow.com/a/52736133/16120011
from copy import copy
for idx, rd in ws.column_dimensions.items():
std_ws.column_dimensions[idx] = copy(rd)
del wb["Sheet2"] # could be unnecessary
wb.save("newfile.xlsx")
输出: