上下文:我正在编写代码来为我的工作创建一个土壤废物分类表。该表使用 excel 文件中的标准进行各种分类,并从 excel 文件中读取实验室结果。这有三种情况
我已经从场景 1 的函数中复制了代码,并尝试修改它以实现我想要的。问题是标准值和结果值现在没有填充 word 文档中的表格。还有关于使代码更简洁/更简洁的任何建议。我以前只学过一点 python,现在正尝试使用它来加快工作流程。
样本数量、分析物数量以及每次使用的分析物可能会有所不同,因此我需要让代码能够应用于不同的情况。
def same_file_waste_class(lab_results_file):
lab_results_file = file_path_entry.get()
if lab_results_file:
envirolab_results = pd.read_excel(
lab_results_file, sheet_name="Sheet1", header=None, index_col=None
)
# Drop TRIPLICATE row(s)
mask = (
envirolab_results.iloc[:, 2]
.astype(str)
.str.contains(r"\[TRIPLICATE\]", case=False, regex=True)
)
mask = mask.fillna(False) # Replace NaN values with False
envirolab_results1 = envirolab_results.loc[~mask]
# Drop Replicate row(s) - where Replicate = 1
envirolab_results2 = envirolab_results1[envirolab_results1.iloc[:, 4] != 1]
# Drop unneeded columns
columns_to_drop = [
"Date extracted",
"Date analysed",
"Date prepared",
"Reference",
"Description",
"Sample.1",
"Type of sample",
"Extraction fluid used",
]
mask = envirolab_results2.iloc[0].isin(columns_to_drop)
envirolab_results3 = envirolab_results2.drop(
columns=envirolab_results2.columns[mask]
)
# Drop Row at position 1 (empty row) and 4 (Method)
rows_to_drop = [1, 4]
envirolab_results4 = envirolab_results3.drop(
envirolab_results3.index[rows_to_drop]
)
# Rename first values in rows 1 and 2
envirolab_results4.iat[1, 0] = "Units"
envirolab_results4.iat[2, 0] = "PQL"
# Drop columbs at position 1 (Sample No.) and 2 (Replicate)
columns_to_drop = [1, 2]
envirolab_results5 = envirolab_results4.drop(
columns=envirolab_results4.columns[columns_to_drop]
)
# Replace unicode line break (_x000d_) with a space
envirolab_results5.iloc[[0]] = envirolab_results5.iloc[[0]].applymap(
lambda x: x.replace("_x000d_", " ") if isinstance(x, str) else x
)
# Split into two dataframes, for soil results and leach results
units_row = envirolab_results5.iloc[1] # sets the location of the Units row
leach_mask = units_row.isin(
["mg/L", "pH units"]
) # Create a boolean mask for leach test results
soil_mask = ~leach_mask # Create a boolean mask for soil results
# Select columns for each DataFrame based on the masks
soil_results = envirolab_results5.loc[:, soil_mask]
leach_results = envirolab_results5.loc[:, leach_mask]
sample_number_col = envirolab_results5.iloc[:, 0] # set sample number column
leach_results = pd.concat(
[sample_number_col, leach_results], axis=1
) # add sample number column to leach results df
soil_results = soil_results.reset_index(
drop=True
) # reset dataframe index to a default integer index
#### Change name of leach analytes to match soil results analytes
leach_results.iloc[0] = leach_results.iloc[0].str.replace(" in TCLP", "")
print(leach_results.head)
print(soil_results.head)
# Sum columns function
def sum_columns(df, columns, new_column, row_start):
df.loc[0, new_column] = new_column
df.loc[1, new_column] = df.loc[1, df.columns[df.iloc[0] == columns[0]][0]]
for i in range(row_start, len(df)):
float_values = []
lt_present = False
for col_name in columns:
col = df.columns[df.iloc[0] == col_name][0]
value = df.loc[i, col]
if "<" in value:
lt_present = True
value = value.replace("<", "")
float_values.append(float(value))
total = round(sum(float_values), 2)
if lt_present:
df.loc[i, new_column] = f"<{total}"
else:
df.loc[i, new_column] = total
# Set the columns to add for Scheduled chemicals using the sum_columns function
SC_columns_to_sum = [
"Aldrin",
"alpha-BHC",
"1,2,4-trichlorobenzene",
"HCB",
"beta-BHC",
"gamma-BHC",
"Heptachlor",
"delta-BHC",
"Heptachlor Epoxide",
"gamma-Chlordane",
"alpha-chlordane",
"pp-DDE",
"Dieldrin",
"Endrin",
"pp-DDD",
"Endrin Aldehyde",
"pp-DDT",
]
sum_columns(soil_results, SC_columns_to_sum, "Scheduled Chemicals", 2)
# Set the columns to add for Endosulfans (total) using the sum_columns function
ES_columns_to_sum = ["Endosulfan I", "Endosulfan II", "Endosulfan Sulphate"]
sum_columns(soil_results, ES_columns_to_sum, "Endosulfans (total)", 2)
### Extract Data
sample_numbers = soil_results.iloc[3:, 0].tolist()
analyte_names = soil_results.iloc[0, 1:].tolist()
results = soil_results.iloc[3:, 1:].values.tolist()
# Load criteria sheet
criteria_file = "Criterias.xlsx"
criteria_df = pd.read_excel(criteria_file, sheet_name="Sheet1", index_col=None)
# Print the criteria DataFrame to verify the data is read correctly
# print(criteria_df)
# print(soil_results.head(50))
# Get the list of analyte names in the soil results
soil_analyte_names = soil_results.iloc[0].tolist()
# Get the list of analyte names in the criteria DataFrame
criteria_analyte_names = criteria_df.index.tolist()
# Find the common analytes between the two lists
soil_common_analytes = list(
set(soil_analyte_names).intersection(criteria_analyte_names)
)
soil_analyte_positions = {
analyte: idx for idx, analyte in enumerate(criteria_analyte_names)
}
soil_common_analytes = sorted(soil_common_analytes, key=lambda x: soil_analyte_positions[x])
##################################################################### Added these just to easily find this spot again as this is where i started to change the previous working function
# Get the list of analyte names in the soil results
leach_analyte_names = leach_results.iloc[0].tolist()
# Get the list of analyte names in the criteria DataFrame
criteria_analyte_names = criteria_df.index.tolist()
# Find the common analytes between the two lists
leach_common_analytes = list(
set(leach_analyte_names).intersection(criteria_analyte_names)
)
leach_analyte_positions = {
analyte: idx for idx, analyte in enumerate(criteria_analyte_names)
}
leach_common_analytes = sorted(leach_common_analytes, key=lambda x: leach_analyte_positions[x])
#####################################################################
# Calculate the number of rows and columns needed in the table
num_samples = len(sample_numbers)
num_analytes = len(soil_common_analytes)
num_columns = 6 + 2 * num_samples
num_rows = 6 + num_analytes
def should_highlight_cell(result, criteria):
# Ignore cells with a "-" sign
if result == "-":
return False
if result.startswith("<"):
result_value = float(result[1:])
else:
result_value = float(result)
if isinstance(criteria, str) and criteria.startswith("<"):
criteria_value = float(criteria[1:])
else:
criteria_value = float(criteria)
return result_value > criteria_value
def shade_cell(cell, hex_color):
tcPr = cell._element.tcPr
if tcPr is None:
tcPr = OxmlElement("w:tcPr")
cell._element.append(tcPr)
shading_elm = OxmlElement("w:shd")
shading_elm.set(qn("w:fill"), hex_color)
tcPr.append(shading_elm)
# Step 3: Create a Word document
doc = Document()
# Change the page orientation to landscape
section = doc.sections[0]
section.page_width, section.page_height = (
section.page_height,
section.page_width,
)
# create table
table = doc.add_table(rows=num_rows, cols=num_columns)
# Step 4: Populate the table
# Add headers and static values
table.cell(0, 5).text = "Sample Number"
table.cell(1, 5).text = "Sample Type"
table.cell(2, 5).text = "Sample Location"
table.cell(3, 5).text = "Sample Depth from Surface (m)"
table.cell(4, 0).text = "Units"
table.cell(4, 1).text = "mg/kg"
table.cell(4, 2).text = "mg/kg"
table.cell(4, 3).text = "mg/kg"
table.cell(4, 4).text = "mg/L"
table.cell(4, 5).text = "mg/kg"
table.cell(5, 5).text = "PQL (Without Leach TCLP Test)"
table.cell(2, 1).text = "WITHOUT Leach (TCLP) Test"
table.cell(2, 3).text = "WITH Leach (TCLP) Test"
table.cell(3, 1).text = "General Solid Waste"
table.cell(3, 2).text = "Restricted Solid Waste"
table.cell(3, 3).text = "General Solid Waste"
table.cell(5, 0).text = "ANALYTE"
table.cell(5, 1).text = "CT1"
table.cell(5, 2).text = "CT2"
table.cell(5, 3).text = "SCC1"
table.cell(5, 4).text = "TCLP1"
# Add sample numbers
for i, sample_number in enumerate(sample_numbers):
table.cell(0, 6 + 2 * i).text = str(sample_number) # Soil result column
table.cell(0, 6 + 2 * i + 1).text = (
str(sample_number) + " (Leach)"
) # Leach result column
# Add units for the results columns
for i in range(num_samples):
table.cell(4, 6 + 2 * i).text = "mg/kg" # Soil result column
table.cell(4, 6 + 2 * i + 1).text = "mg/L" # Leach result column
# Add results and highlight cells if they exceed the CT1 criteria
for row_idx, analyte in enumerate(soil_common_analytes):
# Add analyte name
table.cell(6 + row_idx, 0).text = analyte
# Get the corresponding row in the soil_results DataFrame
analyte_col_index_soil = soil_results.columns.get_loc(
soil_results.columns[soil_results.iloc[0] == analyte][0]
)
analyte_col_index_leach = leach_results.columns.get_loc(
leach_results.columns[leach_results.iloc[0] == analyte][0]
)
analyte_data_soil = soil_results.iloc[3:, analyte_col_index_soil]
analyte_data_leach = leach_results.iloc[3:, analyte_col_index_leach]
# Add criteria values
for col_idx, col in enumerate(["CT1", "CT2", "SCC1", "TCLP1", "PQL"]):
value = criteria_df.loc[analyte, col]
table.cell(6 + row_idx, 1 + col_idx).text = str(value)
# Add soil and leach results
for i in range(num_samples):
value_soil = analyte_data_soil.iloc[i]
value_leach = analyte_data_leach.iloc[i]
# Soil result
result_cell_soil = table.cell(6 + row_idx, 6 + 2 * i)
result_cell_soil.text = str(value_soil)
# Leach result
result_cell_leach = table.cell(6 + row_idx, 6 + 2 * i + 1)
result_cell_leach.text = str(value_leach)
ct1_value = criteria_df.loc[analyte, "CT1"]
if should_highlight_cell(str(value_soil), str(ct1_value)):
shade_cell(result_cell_soil, "#FFAFAF") # dull red colour
if should_highlight_cell(str(value_leach), str(ct1_value)):
shade_cell(result_cell_leach, "#FFAFAF") # dull red colour
# Merge cells
table.cell(2, 1).merge(table.cell(2, 2))
table.cell(2, 3).merge(table.cell(2, 4))
table.cell(3, 3).merge(table.cell(3, 4))
# Apply the 'Table Grid' style
table.style = "Table Grid"
def set_bold(row, start_col, end_col):
for i in range(start_col, end_col + 1):
cell = row.cells[i]
if len(cell.paragraphs) > 0:
paragraph = cell.paragraphs[0]
if len(paragraph.runs) > 0:
run = paragraph.runs[0]
else:
run = paragraph.add_run()
run.font.bold = True
run.font.name = "Arial"
run.font.size = Pt(8)
else:
paragraph = cell.add_paragraph()
run = paragraph.add_run()
run.font.bold = True
run.font.name = "Arial"
run.font.size = Pt(8)
# Shading cells
shade_color_1 = "#BFBFBF"
shade_color_2 = "#D9D9D9"
# Shade cells from cell(0, 0) to cell(5, 5) and cell(4, 6) to the end of row 4
for row_idx in range(6):
for col_idx in range(6):
shade_cell(table.cell(row_idx, col_idx), shade_color_1)
if row_idx == 4:
for col_idx in range(6, num_samples + 6):
shade_cell(table.cell(row_idx, col_idx), shade_color_1)
# Shade cells containing the analyte names and the criteria values
for row_idx in range(6, 6 + len(soil_common_analytes)):
for col_idx in range(6):
shade_cell(table.cell(row_idx, col_idx), shade_color_2)
# Set text in rows 0 to 5 to Bold
for row_idx in range(6):
row = table.rows[row_idx]
set_bold(row, 0, num_samples + 5)
def set_font_and_align(cell, font_name, font_size):
for paragraph in cell.paragraphs:
for run in paragraph.runs:
run.font.name = font_name
run.font.size = Pt(font_size)
paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
# Set vertical alignment
tcPr = cell._element.tcPr
if tcPr is None:
tcPr = OxmlElement("w:tcPr")
cell._element.append(tcPr)
vAlign = OxmlElement("w:vAlign")
vAlign.set(qn("w:val"), "center")
tcPr.append(vAlign)
# Apply font settings and alignment to all cells
for row_idx in range(len(table.rows)):
for col_idx in range(len(table.columns)):
set_font_and_align(table.cell(row_idx, col_idx), "Arial", 8)
def set_row_height(row, height_cm):
height_twips = int(height_cm * 28.35 * 20) # Convert cm to twips
tr = row._tr
trPr = tr.get_or_add_trPr()
trHeight = OxmlElement("w:trHeight")
trHeight.set(qn("w:val"), str(height_twips))
trHeight.set(qn("w:hRule"), "atLeast")
trPr.append(trHeight)
def set_column_width(column, width_cm):
width_twips = int(width_cm * 28.35 * 20) # Convert cm to twips
for cell in column.cells:
cell.width = width_twips
cell._element.tcPr.tcW.type = "dxa"
cell._element.tcPr.tcW.w = width_twips
# Set the width of column 6 (position 5) to 2.79 cm
set_column_width(table.columns[5], 2.79)
# Set the height for each row
height_cm = 0.53
for row in table.rows:
set_row_height(row, height_cm)
# Save the Word document
doc.save("output_table.docx")
os.system("start output_table.docx")
word doc中的表格除了criteria values, soil result values和leach result values外都正确输出了