更新 Python 代码以使用 Azure Function App 执行“文本到列”操作

问题描述 投票:0回答:1

我正在使用带有 Python 的 Azure Function App 将 csv 转换为 xlsx。这工作正常,但是,StartDate 和 LeaveDate 列数据出现在列的左侧,这告诉我它不正确。然后,我需要对列执行“文本到列”操作,这将为我提供所需的输出。任何人都可以查看我的代码并提出一些建议来实现这一目标吗?谢谢罗斯

import azure.functions as func
import logging
from azure.storage.blob import BlobServiceClient
import os
import pandas as pd
import numpy as np
import io
from io import BytesIO  # Import for in-memory file object

app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)

def main(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    connect_str = os.environ["AzureWebJobsStorage"]
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    container_name_input = "input"
    container_name_output = "output"
    # Get container and blob name from request parameters (or modify for local testing)
    container_name = req.params.get('container')
    blob_name = req.params.get('name')

    blob_client_input = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_content = blob_client_input.download_blob().readall()

    # Use BytesIO to wrap downloaded bytes
        df = pd.read_csv(io.BytesIO(blob_content))
    except UnicodeDecodeError:  # Handle potential encoding issues (optional)
        # If you suspect encoding issues, try different encodings here
        df = pd.read_csv(io.BytesIO(blob_content), encoding='utf-16')  # Example for UTF-16

    # Convert StartDate column to datetime format with the correct format string
    df['StartDate'] = pd.to_datetime(df['StartDate'], format='%d/%m/%Y')
    df['LeaveDate'] = pd.to_datetime(df['LeaveDate'], format='%d/%m/%Y')

    # Format StartDate column to dd/mm/yyyy
    df['StartDate'] = df['StartDate'].dt.strftime('%d/%m/%Y')
    df['LeaveDate'] = df['LeaveDate'].dt.strftime('%d/%m/%Y')

    output = BytesIO()
    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    df.to_excel(writer, index=False, sheet_name='Sheet1')

    # Get the xlsxwriter workbook and worksheet objects.
    workbook  = writer.book
    worksheet = writer.sheets['Sheet1']

    # Add a date format for the StartDate column.
    date_format = workbook.add_format({'num_format': 'dd/mm/yyyy'})

    # Apply the date format to the StartDate column.
    worksheet.set_column('H:H', None, date_format)
    worksheet.set_column('I:I', None, date_format)

    # Use close() to finalize the Excel workbook

    # Create output blob with correct extension
    output_blob_name = blob_name.replace('.csv', '.xlsx')
    excel_blob_client = blob_service_client.get_blob_client(container=container_name_output, blob=output_blob_name)

    # Delete the input CSV file

    return func.HttpResponse(f"CSV file converted to XLSX: {output_blob_name}. Input CSV file deleted.")

与在 Excel 中执行文本到列操作相同

python excel azure function


import azure.functions as func
import logging
from azure.storage.blob import BlobServiceClient
import os
import pandas as pd
import xlsxwriter
import io
from io import BytesIO 

app = func.FunctionApp(http_auth_level=func.AuthLevel.ANONYMOUS)

def http_trigger(req: func.HttpRequest) -> func.HttpResponse:
    logging.info('Python HTTP trigger function processed a request.')

    connect_str = os.environ["StorageAccountConnectionString"]
    blob_service_client = BlobServiceClient.from_connection_string(connect_str)
    container_name_output = "samples-workitems"
    # Get container and blob name from request parameters (or modify for local testing)
    container_name = req.params.get('container')
    blob_name = req.params.get('blob')

    blob_client_input = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
    blob_content = blob_client_input.download_blob().readall()

    # Use BytesIO to wrap downloaded bytes
        df = pd.read_csv(io.BytesIO(blob_content))
    except UnicodeDecodeError:  # Handle potential encoding issues (optional)
        # If you suspect encoding issues, try different encodings here
        df = pd.read_csv(io.BytesIO(blob_content), encoding='utf-16')  # Example for UTF-16

    # Convert StartDate column to datetime format with the correct format string
    df['StartDate'] = pd.to_datetime(df['StartDate'], format='%d/%m/%Y')
    df['LeaveDate'] = pd.to_datetime(df['LeaveDate'], format='%d/%m/%Y')

    # Format StartDate column to dd/mm/yyyy
    df['StartDate'] = df['StartDate'].dt.strftime('%d/%m/%Y')
    df['LeaveDate'] = df['LeaveDate'].dt.strftime('%d/%m/%Y')

    output = BytesIO()
    # Create an instance of xlsxwriter.Workbook
    workbook = xlsxwriter.Workbook(output, {'remove_timezone': True})  # Add options to remove timezone warning
    worksheet = workbook.add_worksheet('Sheet1')
    # Write column headers to the worksheet
    for col_num, col_name in enumerate(df.columns):
        worksheet.write(0, col_num, col_name)

    # Write DataFrame rows to the worksheet starting from row 1 (excluding headers)
    for row_num, row_data in enumerate(df.values, start=1):
        for col_num, value in enumerate(row_data):
            worksheet.write(row_num, col_num, value)

    # Add a date format for the StartDate and LeaveDate columns.
    date_format = workbook.add_format({'num_format': 'dd/mm/yyyy'})
    worksheet.set_column('A:B', None, date_format)

    # Use close() to finalize the Excel workbook

    # Create output blob with correct extension
    output_blob_name = blob_name.replace('.csv', '.xlsx')
    excel_blob_client = blob_service_client.get_blob_client(container=container_name_output, blob=output_blob_name)

    # Delete the input CSV file

    return func.HttpResponse(f"CSV file converted to XLSX: {output_blob_name}. Input CSV file deleted.")




enter image description here

enter image description here

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.