我有以下txt文件,我想使用不使用行号的数据文本解析方法输出2个字典txt文件,因为它应该适用于更大的txt文件。
"Height1" : { "Fir_ColumnB" : 123.50, "Fir_ColumnC" : 4, "Fir_ColumnD" : 31}, "Height2" : { "Fir_ColumnB" : 2334.00, "Fir_ColumnC" : 62, "Fir_ColumnD" : 0}, "Height3" : {.....}
{"Row1" : "12251m", "Row2" : "3231m","Row3" : "31412m"}
但是由于那些背靠背的“****”行,我遇到了一些困难。
creating_testing_1
TESTING_1 ;
TESTING_1 ;
TESTING_1 ; First Chapter ed
TESTING_1 ; ********************************************************
TESTING_1 ; Fir_ColumnA Fir_ColumnB Fir_ColumnC Fir_ColumnD
TESTING_1 ; ********************************************************
TESTING_1 ; Height1 123.50 4 31
TESTING_1 ; Height2 2334.00 62 0
TESTING_1 ; Height3 0.00 23 23
TESTING_1 ; ********************************************************
TESTING_1 ;
TESTING_1 ; Second Chapter ed
TESTING_1 ; ********************************************************
TESTING_1 ; Sec_ColumnA Sec_ColumnB
TESTING_1 ; ********************************************************
TESTING_1 ; Row1 12251m
TESTING_1 ; Row2 3231m
TESTING_1 ; Row3 31412m
TESTING_1 ; ********************************************************
TESTING_1 ;
TESTING_1 ; Ending...
TESTING_1 ;
def parse_txt_file(filename):
data = {}
current_chapter = None
with open(filename, 'r') as file:
for line in file:
line = line.strip()
if line.startswith(';'):
continue
if 'First Chapter ed' in line:
current_chapter = 'First Chapter ed'
data[current_chapter] = {}
continue
elif 'Second Chapter ed' in line:
current_chapter = 'Second Chapter ed'
data[current_chapter] = {}
continue
elif 'Ending...' in line:
break
if current_chapter:
if line.startswith('*'):
continue
columns = line.split()
if len(columns) == 4:
key = columns[0]
values = columns[1:]
if current_chapter == 'First Chapter ed':
data[current_chapter][key] = {
"Fir_ColumnB": float(values[0]),
"Fir_ColumnC": int(values[1]),
"Fir_ColumnD": int(values[2])
}
elif current_chapter == 'Second Chapter ed':
data[current_chapter][key] = values[0]
return data
def save_dictionary_to_txt(data, filename):
with open(filename, 'w') as file:
for chapter, chapter_data in data.items():
file.write(f"{chapter}\n")
for key, value in chapter_data.items():
if isinstance(value, dict):
file.write(f'"{key}" : {value}\n')
else:
file.write(f'"{key}" : "{value}"\n')
file.write('\n')
def main():
filename = 'short_test_2.txt'
data = parse_txt_file(filename)
for chapter, chapter_data in data.items():
save_dictionary_to_txt({chapter: chapter_data}, f'{chapter.replace(" ", "_").lower()}.txt')
if __name__ == "__main__":
main()
这里成功的关键是要认识到数据都是围绕三行星号分组的。一旦您知道所有这些分组在哪里,其他一切就都到位了。
尚不清楚您期望的输出是什么,但您应该能够使用它来进一步解决问题:
import json # used only for presention of the dictionaries
def convert(s):
try:
return int(s)
except ValueError:
try:
return float(s)
except ValueError:
pass
return s
def segments(content):
s = []
for i, e in enumerate(content):
if e.startswith("*"):
s.append(i)
if len(s) == 3:
yield s
s = []
def normalise(content):
output = []
for row in content:
_, *values = row.split(";")
if values and (value := values[0].strip()):
output.append(value)
return output
with open("foo.txt") as data:
results = []
content = normalise(data.readlines())
for s, m, e in segments(content):
td = {"title": content[s-1]}
keys = content[s+1].split()
for row in content[m+1:e]:
for k, v in zip(keys, row.split()):
td.setdefault(k, []).append(convert(v))
results.append(td)
print(json.dumps(results, indent=2))
输出:
[
{
"title": "First Chapter ed",
"Fir_ColumnA": [
"Height1",
"Height2",
"Height3"
],
"Fir_ColumnB": [
123.5,
2334.0,
0.0
],
"Fir_ColumnC": [
4,
62,
23
],
"Fir_ColumnD": [
31,
0,
23
]
},
{
"title": "Second Chapter ed",
"Sec_ColumnA": [
"Row1",
"Row2",
"Row3"
],
"Sec_ColumnB": [
"12251m",
"3231m",
"31412m"
]
}
]