目前,我只是尝试读取该文件并将其转换为 csv 文件。但无论我尝试什么编码,我总是得到
UnicodeDecodeError
或 UnicodeEncodeError
。
这是我的代码:
import os
import sys
import csv
import codecs
with open("holder.csv", "w") as my_empty_csv:
pass
def convert(inputFile, outputFile):
writer = csv.writer(outputFile)
fieldIndexes = []
headers = ""
for idx, val in enumerate(inputFile):
if (idx == 0):
headers = val
elif (idx == 1):
fieldIndexes = list(getFieldIndexes(val, " "))
row = list(getFields(headers, fieldIndexes))
writer.writerow(row)
else:
row = list(getFields(val, fieldIndexes))
writer.writerow(row)
def getFieldIndexes(input, sep):
lastIndex = 0
for idx, c in enumerate(input):
if (c == sep):
yield (lastIndex, idx)
lastIndex = idx + 1
yield lastIndex, len(input)
def getFields(input, indexes):
for index in indexes:
yield input[index[0]:index[1]].strip()
if __name__ == '__main__':
if (len(sys.argv) == 3):
encodings = ['ascii',
'big5',
'big5hkscs',
'cp037',
'cp273',
'cp424',
'cp437',
'cp500',
'cp720',
'cp737',
'cp775',
'cp850',
'cp852',
'cp855',
'cp856',
'cp857',
'cp858',
'cp860',
'cp861',
'cp862',
'cp863',
'cp864',
'cp865',
'cp866',
'cp869',
'cp874',
'cp875',
'cp932',
'cp949',
'cp950',
'cp1006',
'cp1026',
'cp1125',
'cp1140',
'cp1250',
'cp1251',
'cp1252',
'cp1253',
'cp1254',
'cp1255',
'cp1256',
'cp1257',
'cp1258',
'euc_jp',
'euc_jis_2004',
'euc_jisx0213',
'euc_kr',
'gb2312',
'gbk',
'gb18030',
'hz',
'iso2022_jp',
'iso2022_jp_1',
'iso2022_jp_2',
'iso2022_jp_2004',
'iso2022_jp_3',
'iso2022_jp_ext',
'iso2022_kr',
'latin_1',
'iso8859_2',
'iso8859_3',
'iso8859_4',
'iso8859_5',
'iso8859_6',
'iso8859_7',
'iso8859_8',
'iso8859_9',
'iso8859_10',
'iso8859_11',
'iso8859_13',
'iso8859_14',
'iso8859_15',
'iso8859_16',
'johab',
'koi8_r',
'koi8_t',
'koi8_u',
'kz1048',
'mac_cyrillic',
'mac_greek',
'mac_iceland',
'mac_latin2',
'mac_roman',
'mac_turkish',
'ptcp154',
'shift_jis',
'shift_jis_2004',
'shift_jisx0213',
'utf_32',
'utf_32_be',
'utf_32_le',
'utf_16',
'utf_16_be',
'utf_16_le',
'utf_7',
'utf_8',
'utf_8_sig']
for encoding in encodings:
with open(sys.argv[1], encoding=encoding) as inputFile:
with open(f"{sys.argv[2]}-{encoding}.csv", 'w', newline='') as outputFile:
try:
convert(inputFile, outputFile)
print(f"RIGHT ONE {encoding}")
except UnicodeDecodeError as e:
outputFile.close()
os.remove(
f"{sys.argv[2]}-{encoding}.csv")
exc_type, exc_obj, exc_tb = sys.exc_info()
print(exc_tb.tb_lineno)
print(f"Not {encoding}")
except UnicodeEncodeError as e:
outputFile.close()
os.remove(
f"{sys.argv[2]}-{encoding}.csv")
exc_type, exc_obj, exc_tb = sys.exc_info()
print(exc_tb.tb_lineno)
print(f"Not {encoding}")
else:
print("Usage: rpt2csv.py inputFile outputFile")
我尝试迭代 python 支持的任何可能的编码,不幸的是没有运气。如果可能的话,有没有办法用 python 或者其他语言来做到这一点。预先感谢您!
Crystal 的 .rpt 文件是一种专有文件格式,您不应该能够读取它。
也许您应该尝试做的是使用 Crystal 运行时来运行报告并将其导出为 CSV。
最新的 Crystal 运行时使用 .NET(Crystal Reports for Visual Studio)。由于您使用的是 Python,因此请考虑将该步骤委托给第 3 方 Crystal Reports 查看器或自动化工具。其中一些实用程序具有完整的命令行 API,允许您的 Python 代码指定 rpt、登录信息、参数值、导出格式、导出文件名等。
Ken Hamady 在此处维护着第 3 方 Crystal Reports 实用程序的列表。