我正在使用带有 Python 的 Azure Function App 将 csv 转换为 xlsx。这工作正常,但是,StartDate 和 LeaveDate 列数据出现在列的左侧,这告诉我它不正确。然后,我需要对列执行“文本到列”操作,这将为我提供所需的输出。任何人都可以查看我的代码并提出一些建议来实现这一目标吗?谢谢罗斯
import azure.functions as func
import logging
from azure.storage.blob import BlobServiceClient
import os
import pandas as pd
import numpy as np
import io
from io import BytesIO # Import for in-memory file object
app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
connect_str = os.environ["AzureWebJobsStorage"]
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name_input = "input"
container_name_output = "output"
# Get container and blob name from request parameters (or modify for local testing)
container_name = req.params.get('container')
blob_name = req.params.get('name')
blob_client_input = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
blob_content = blob_client_input.download_blob().readall()
# Use BytesIO to wrap downloaded bytes
try:
df = pd.read_csv(io.BytesIO(blob_content))
except UnicodeDecodeError: # Handle potential encoding issues (optional)
# If you suspect encoding issues, try different encodings here
df = pd.read_csv(io.BytesIO(blob_content), encoding='utf-16') # Example for UTF-16
# Convert StartDate column to datetime format with the correct format string
df['StartDate'] = pd.to_datetime(df['StartDate'], format='%d/%m/%Y')
df['LeaveDate'] = pd.to_datetime(df['LeaveDate'], format='%d/%m/%Y')
# Format StartDate column to dd/mm/yyyy
df['StartDate'] = df['StartDate'].dt.strftime('%d/%m/%Y')
df['LeaveDate'] = df['LeaveDate'].dt.strftime('%d/%m/%Y')
output = BytesIO()
writer = pd.ExcelWriter(output, engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Add a date format for the StartDate column.
date_format = workbook.add_format({'num_format': 'dd/mm/yyyy'})
# Apply the date format to the StartDate column.
worksheet.set_column('H:H', None, date_format)
worksheet.set_column('I:I', None, date_format)
# Use close() to finalize the Excel workbook
writer.close()
# Create output blob with correct extension
output_blob_name = blob_name.replace('.csv', '.xlsx')
excel_blob_client = blob_service_client.get_blob_client(container=container_name_output, blob=output_blob_name)
excel_blob_client.upload_blob(output.getvalue())
# Delete the input CSV file
blob_client_input.delete_blob()
return func.HttpResponse(f"CSV file converted to XLSX: {output_blob_name}. Input CSV file deleted.")
与在 Excel 中执行文本到列操作相同
下面的代码对我有用。
import azure.functions as func
import logging
from azure.storage.blob import BlobServiceClient
import os
import pandas as pd
import xlsxwriter
import io
from io import BytesIO
app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)
@app.route(route="http_trigger")
def http_trigger(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')
connect_str = os.environ["StorageAccountConnectionString"]
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_name_output = "samples-workitems"
# Get container and blob name from request parameters (or modify for local testing)
container_name = req.params.get('container')
blob_name = req.params.get('blob')
blob_client_input = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
blob_content = blob_client_input.download_blob().readall()
# Use BytesIO to wrap downloaded bytes
try:
df = pd.read_csv(io.BytesIO(blob_content))
except UnicodeDecodeError: # Handle potential encoding issues (optional)
# If you suspect encoding issues, try different encodings here
df = pd.read_csv(io.BytesIO(blob_content), encoding='utf-16') # Example for UTF-16
# Convert StartDate column to datetime format with the correct format string
df['StartDate'] = pd.to_datetime(df['StartDate'], format='%d/%m/%Y')
df['LeaveDate'] = pd.to_datetime(df['LeaveDate'], format='%d/%m/%Y')
# Format StartDate column to dd/mm/yyyy
df['StartDate'] = df['StartDate'].dt.strftime('%d/%m/%Y')
df['LeaveDate'] = df['LeaveDate'].dt.strftime('%d/%m/%Y')
output = BytesIO()
# Create an instance of xlsxwriter.Workbook
workbook = xlsxwriter.Workbook(output, {'remove_timezone': True}) # Add options to remove timezone warning
worksheet = workbook.add_worksheet('Sheet1')
# Write column headers to the worksheet
for col_num, col_name in enumerate(df.columns):
worksheet.write(0, col_num, col_name)
# Write DataFrame rows to the worksheet starting from row 1 (excluding headers)
for row_num, row_data in enumerate(df.values, start=1):
for col_num, value in enumerate(row_data):
worksheet.write(row_num, col_num, value)
# Add a date format for the StartDate and LeaveDate columns.
date_format = workbook.add_format({'num_format': 'dd/mm/yyyy'})
worksheet.set_column('A:B', None, date_format)
# Use close() to finalize the Excel workbook
workbook.close()
# Create output blob with correct extension
output_blob_name = blob_name.replace('.csv', '.xlsx')
excel_blob_client = blob_service_client.get_blob_client(container=container_name_output, blob=output_blob_name)
excel_blob_client.upload_blob(output.getvalue())
# Delete the input CSV file
blob_client_input.delete_blob()
return func.HttpResponse(f"CSV file converted to XLSX: {output_blob_name}. Input CSV file deleted.")
需求.txt-
azure-functions
azure-storage-blob
pandas
XlsxWriter
我能够通过执行给定的代码获得预期的响应。