如何使用Python读取水晶报表.rpt文件?

问题描述 投票:0回答:1

目前,我只是尝试读取该文件并将其转换为 csv 文件。但无论我尝试什么编码,我总是得到

UnicodeDecodeError
UnicodeEncodeError

这是我的代码:

import os
import sys
import csv
import codecs

with open("holder.csv", "w") as my_empty_csv:
    pass


def convert(inputFile, outputFile):
    writer = csv.writer(outputFile)
    fieldIndexes = []
    headers = ""

    for idx, val in enumerate(inputFile):
        if (idx == 0):
            headers = val
        elif (idx == 1):
            fieldIndexes = list(getFieldIndexes(val, " "))
            row = list(getFields(headers, fieldIndexes))
            writer.writerow(row)
        else:
            row = list(getFields(val, fieldIndexes))
            writer.writerow(row)


def getFieldIndexes(input, sep):
    lastIndex = 0
    for idx, c in enumerate(input):
        if (c == sep):
            yield (lastIndex, idx)
            lastIndex = idx + 1
    yield lastIndex, len(input)


def getFields(input, indexes):
    for index in indexes:
        yield input[index[0]:index[1]].strip()


if __name__ == '__main__':
    if (len(sys.argv) == 3):
        encodings = ['ascii',
                     'big5',
                     'big5hkscs',
                     'cp037',
                     'cp273',
                     'cp424',
                     'cp437',
                     'cp500',
                     'cp720',
                     'cp737',
                     'cp775',
                     'cp850',
                     'cp852',
                     'cp855',
                     'cp856',
                     'cp857',
                     'cp858',
                     'cp860',
                     'cp861',
                     'cp862',
                     'cp863',
                     'cp864',
                     'cp865',
                     'cp866',
                     'cp869',
                     'cp874',
                     'cp875',
                     'cp932',
                     'cp949',
                     'cp950',
                     'cp1006',
                     'cp1026',
                     'cp1125',
                     'cp1140',
                     'cp1250',
                     'cp1251',
                     'cp1252',
                     'cp1253',
                     'cp1254',
                     'cp1255',
                     'cp1256',
                     'cp1257',
                     'cp1258',
                     'euc_jp',
                     'euc_jis_2004',
                     'euc_jisx0213',
                     'euc_kr',
                     'gb2312',
                     'gbk',
                     'gb18030',
                     'hz',
                     'iso2022_jp',
                     'iso2022_jp_1',
                     'iso2022_jp_2',
                     'iso2022_jp_2004',
                     'iso2022_jp_3',
                     'iso2022_jp_ext',
                     'iso2022_kr',
                     'latin_1',
                     'iso8859_2',
                     'iso8859_3',
                     'iso8859_4',
                     'iso8859_5',
                     'iso8859_6',
                     'iso8859_7',
                     'iso8859_8',
                     'iso8859_9',
                     'iso8859_10',
                     'iso8859_11',
                     'iso8859_13',
                     'iso8859_14',
                     'iso8859_15',
                     'iso8859_16',
                     'johab',
                     'koi8_r',
                     'koi8_t',
                     'koi8_u',
                     'kz1048',
                     'mac_cyrillic',
                     'mac_greek',
                     'mac_iceland',
                     'mac_latin2',
                     'mac_roman',
                     'mac_turkish',
                     'ptcp154',
                     'shift_jis',
                     'shift_jis_2004',
                     'shift_jisx0213',
                     'utf_32',
                     'utf_32_be',
                     'utf_32_le',
                     'utf_16',
                     'utf_16_be',
                     'utf_16_le',
                     'utf_7',
                     'utf_8',
                     'utf_8_sig']
        for encoding in encodings:

            with open(sys.argv[1], encoding=encoding) as inputFile:
                with open(f"{sys.argv[2]}-{encoding}.csv", 'w', newline='') as outputFile:
                    try:
                        convert(inputFile, outputFile)
                        print(f"RIGHT ONE {encoding}")
                    except UnicodeDecodeError as e:
                        outputFile.close()
                        os.remove(
                            f"{sys.argv[2]}-{encoding}.csv")
                        exc_type, exc_obj, exc_tb = sys.exc_info()
                        print(exc_tb.tb_lineno)
                        print(f"Not {encoding}")
                    except UnicodeEncodeError as e:
                        outputFile.close()
                        os.remove(
                            f"{sys.argv[2]}-{encoding}.csv")
                        exc_type, exc_obj, exc_tb = sys.exc_info()
                        print(exc_tb.tb_lineno)
                        print(f"Not {encoding}")

    else:
        print("Usage: rpt2csv.py inputFile outputFile")

我尝试迭代 python 支持的任何可能的编码,不幸的是没有运气。如果可能的话,有没有办法用 python 或者其他语言来做到这一点。预先感谢您!

python encoding crystal-reports
1个回答
0
投票

Crystal 的 .rpt 文件是一种专有文件格式,您不应该能够读取它。

也许您应该尝试做的是使用 Crystal 运行时来运行报告并将其导出为 CSV。

最新的 Crystal 运行时使用 .NET(Crystal Reports for Visual Studio)。由于您使用的是 Python,因此请考虑将该步骤委托给第 3 方 Crystal Reports 查看器或自动化工具。其中一些实用程序具有完整的命令行 API,允许您的 Python 代码指定 rpt、登录信息、参数值、导出格式、导出文件名等。

Ken Hamady 在此处维护着第 3 方 Crystal Reports 实用程序的列表。

© www.soinside.com 2019 - 2024. All rights reserved.