更新 Python 代码以使用 Azure Function App 执行“文本到列”操作

问题描述 投票:0回答:1

我正在使用带有 Python 的 Azure Function App 将 csv 转换为 xlsx。这工作正常,但是,StartDate 和 LeaveDate 列数据出现在列的左侧,这告诉我它不正确。然后,我需要对列执行“文本到列”操作,这将为我提供所需的输出。任何人都可以查看我的代码并提出一些建议来实现这一目标吗?谢谢罗斯

import azure.functions as func
import logging
from azure.storage.blob import BlobServiceClient
import os
import pandas as pd
import numpy as np
import io
from io import BytesIO  # Import for in-memory file object


app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)

def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    connect_str = os.environ["AzureWebJobsStorage"]
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    container_name_input = "input"
    container_name_output = "output"
   
    # Get container and blob name from request parameters (or modify for local testing)
    container_name = req.params.get('container')
    blob_name = req.params.get('name')

    blob_client_input = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_content = blob_client_input.download_blob().readall()

    # Use BytesIO to wrap downloaded bytes
    try:
        df = pd.read_csv(io.BytesIO(blob_content))
    except UnicodeDecodeError:  # Handle potential encoding issues (optional)
        # If you suspect encoding issues, try different encodings here
        df = pd.read_csv(io.BytesIO(blob_content), encoding='utf-16')  # Example for UTF-16

    # Convert StartDate column to datetime format with the correct format string
    df['StartDate'] = pd.to_datetime(df['StartDate'], format='%d/%m/%Y')
    df['LeaveDate'] = pd.to_datetime(df['LeaveDate'], format='%d/%m/%Y')

    # Format StartDate column to dd/mm/yyyy
    df['StartDate'] = df['StartDate'].dt.strftime('%d/%m/%Y')
    df['LeaveDate'] = df['LeaveDate'].dt.strftime('%d/%m/%Y')

    output = BytesIO()
    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    df.to_excel(writer, index=False, sheet_name='Sheet1')

    # Get the xlsxwriter workbook and worksheet objects.
    workbook  = writer.book
    worksheet = writer.sheets['Sheet1']

    # Add a date format for the StartDate column.
    date_format = workbook.add_format({'num_format': 'dd/mm/yyyy'})

    # Apply the date format to the StartDate column.
    worksheet.set_column('H:H', None, date_format)
    worksheet.set_column('I:I', None, date_format)

    # Use close() to finalize the Excel workbook
    writer.close()

    # Create output blob with correct extension
    output_blob_name = blob_name.replace('.csv', '.xlsx')
    excel_blob_client = blob_service_client.get_blob_client(container=container_name_output, blob=output_blob_name)
    excel_blob_client.upload_blob(output.getvalue())

    # Delete the input CSV file
    blob_client_input.delete_blob()

    return func.HttpResponse(f"CSV file converted to XLSX: {output_blob_name}. Input CSV file deleted.")

与在 Excel 中执行文本到列操作相同

python excel azure function
1个回答
0
投票

下面的代码对我有用。

import azure.functions as func
import logging
from azure.storage.blob import BlobServiceClient
import os
import pandas as pd
import xlsxwriter
import io
from io import BytesIO 

app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)

@app.route(route="http_trigger")
def http_trigger(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    connect_str = os.environ["StorageAccountConnectionString"]
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    container_name_output = "samples-workitems"
   
    # Get container and blob name from request parameters (or modify for local testing)
    container_name = req.params.get('container')
    blob_name = req.params.get('blob')

    blob_client_input = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_content = blob_client_input.download_blob().readall()

    # Use BytesIO to wrap downloaded bytes
    try:
        df = pd.read_csv(io.BytesIO(blob_content))
    except UnicodeDecodeError:  # Handle potential encoding issues (optional)
        # If you suspect encoding issues, try different encodings here
        df = pd.read_csv(io.BytesIO(blob_content), encoding='utf-16')  # Example for UTF-16

    # Convert StartDate column to datetime format with the correct format string
    df['StartDate'] = pd.to_datetime(df['StartDate'], format='%d/%m/%Y')
    df['LeaveDate'] = pd.to_datetime(df['LeaveDate'], format='%d/%m/%Y')

    # Format StartDate column to dd/mm/yyyy
    df['StartDate'] = df['StartDate'].dt.strftime('%d/%m/%Y')
    df['LeaveDate'] = df['LeaveDate'].dt.strftime('%d/%m/%Y')

    output = BytesIO()
    # Create an instance of xlsxwriter.Workbook
    workbook = xlsxwriter.Workbook(output, {'remove_timezone': True})  # Add options to remove timezone warning
    worksheet = workbook.add_worksheet('Sheet1')
    
    # Write column headers to the worksheet
    for col_num, col_name in enumerate(df.columns):
        worksheet.write(0, col_num, col_name)

    # Write DataFrame rows to the worksheet starting from row 1 (excluding headers)
    for row_num, row_data in enumerate(df.values, start=1):
        for col_num, value in enumerate(row_data):
            worksheet.write(row_num, col_num, value)

    # Add a date format for the StartDate and LeaveDate columns.
    date_format = workbook.add_format({'num_format': 'dd/mm/yyyy'})
    worksheet.set_column('A:B', None, date_format)

    # Use close() to finalize the Excel workbook
    workbook.close()

    # Create output blob with correct extension
    output_blob_name = blob_name.replace('.csv', '.xlsx')
    excel_blob_client = blob_service_client.get_blob_client(container=container_name_output, blob=output_blob_name)
    excel_blob_client.upload_blob(output.getvalue())

    # Delete the input CSV file
    blob_client_input.delete_blob()

    return func.HttpResponse(f"CSV file converted to XLSX: {output_blob_name}. Input CSV file deleted.")

需求.txt-

azure-functions
azure-storage-blob
pandas
XlsxWriter

我能够通过执行给定的代码获得预期的响应。

enter image description here

enter image description here

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.