我有一个具有特殊结构的 xml 文件,我需要使用脚本 python 将其转换为 csv 文件 这是我的 xml 文件的一部分:
<?xml version="1.0" encoding="UTF-8"?>
<results version="2">
<cppcheck version="2.9"/>
<errors>
<error identifier="redundantAssignment" errorStyle="style" msg="Variable 'ret' is reassigned a value before the old one has been used.">
<location file="D:\test\main.c" line="64" column="8" info="ret is overwritten"/>
<location file="D:\test\main.c" line="62" column="8" info="ret is assigned"/>
<symbol>ret</symbol>
</error>
<error identifier="redundantAssignment" errorStyle="style" msg="Variable 'ret' is reassigned a value before the old one has been used.">
<location file="D:\test\data.c" line="93" column="8" info="ret is overwritten"/>
<location file="D:\test\data.c" line="91" column="8" info="ret is assigned"/>
<symbol>ret</symbol>
</error>
</errors>
</results>
我正在使用这个脚本,但它对我不起作用:
import xml.etree.ElementTree as ET
import csv
# PARSE XML
xml = ET.parse("./error.xml")
root = xml.getElementsByTagName()
# CREATE CSV FILE
csvfile = open("data.csv",'w',encoding='utf-8')
csvfile_writer = csv.writer(csvfile)
# ADD THE HEADER TO CSV FILE
csvfile_writer.writerow(["identifier","file","errorStyle","msg"])
# FOR EACH EMPLOYEE
for error in root.findall("errors/error"):
if(error):
# EXTRACT EMPLOYEE DETAILS
identifier = error.get('identifier')
file = error.find('file')
errorStyle = error.find("errorStyle")
msg = error.find("msg")
csv_line = [identifier, file.text, errorStyle.text, msg.text]
# ADD A NEW ROW TO CSV FILE
csvfile_writer.writerow(csv_line)
csvfile.close()
请参考以下代码:
import xml.etree.ElementTree as ET
import csv
xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<results version="2">
<cppcheck version="2.9"/>
<errors>
<error identifier="redundantAssignment" errorStyle="style" msg="Variable 'ret' is reassigned a value before the old one has been used.">
<location file="Din.c" line="64" column="8" info="ret is overwritten"/>
<location file="D.c" line="62" column="8" info="ret is assigned"/>
<symbol>ret</symbol>
</error>
<error identifier="redundantAssignment" errorStyle="style" msg="Variable 'ret' is reassigned a value before the old one has been used.">
<location file="Dta.c" line="93" column="8" info="ret is overwritten"/>
<location file="Dta.c" line="91" column="8" info="ret is assigned"/>
<symbol>ret</symbol>
</error>
</errors>
</results>"""
root = ET.fromstring(xml_data)
csvfile = open("data.csv",'w')
csvfile_writer = csv.writer(csvfile)
csvfile_writer.writerow(["msg","identifier","errorStyle"])
for child in root:
for item in child:
csv_line = [item.attrib["msg"],item.attrib["identifier"] , item.attrib["errorStyle"]]
csvfile_writer.writerow(csv_line)
print item.attrib
csvfile.close()
希望这有帮助,谢谢。
注意:不是原始问题的答案,而是一个有价值的示例,具体取决于 xml 结构: 我在使用 csv 和 pandas 模块强制 csv 输出为文本字段时遇到了麻烦。 (没有引号或三引号) 当您有一个相当简单的 xml,您只想将属性或子元素转换为 csv 时,我想出了一个仅包含简单文件 IO、格式化程序和生成器表达式的解决方案:
import os
import xml.etree.ElementTree as ET
def items2csv(root, csv_path):
attributes = ['name', 'gps', 'country', 'year', 'notes']
csvfile = open(csv_path, 'w')
# Column headers
line = '"{}", "{}", "{}", "{}", "{}"'.format(*attributes)
csvfile.write(line)
for parent in root:
values = [(parent.get(attrib) if parent.get(attrib) != None else '') for attrib in attributes]
line = '\n"{}", "{}", "{}", "{}", "{}"'.format(*values)
csvfile.write(line)
csvfile.close()
xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<tree>
<item name="Name1" gps="24.227191 35.573413" country="Egypt" year="2004"></item>
<item name="Name2" gps="24.228596 35.573733" country="Egypt" year="2004"></item>
<item name="Name3" gps="24.253222 35.539939" country="Egypt" year="2004"></item>
<item name="Name4" gps="25.429583 34.694408" country="Egypt" year="2007" notes="https://www.blabla.com "></item>
<item name="Name5" gps="25.309756 34.860375" country="Egypt" year="2007"></item>
</tree>"""
#root = ET.parse('test.xml').getroot() # from file
root = ET.fromstring(xml_data) # from variable
items2csv(root, os.path.dirname(__file__) + "/test_output.csv")
wait = input("Press Enter to Exit.")
以及基于子元素的简单 xml 结构的示例:
#!/usr/bin/python
import os
import xml.etree.ElementTree as ET
def items2csv(root, csv_path):
tags = ['name', 'gps', 'country', 'year', 'notes']
csvfile = open(csv_path, 'w')
# Column headers
line = '"{}", "{}", "{}", "{}", "{}"'.format(*tags)
csvfile.write(line)
for parent in root:
values = [(parent.findtext(tag) if parent.findtext(tag) != None else '') for tag in tags]
line = '\n"{}", "{}", "{}", "{}", "{}"'.format(*values)
csvfile.write(line)
csvfile.close()
xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<tree>
<item>
<name>Name1</name>
<gps>24.227191 35.573413</gps>
<country>Egypt</country>
<year>2004</year>
</item>
<item>
<name>Name2</name>
<gps>24.228596 35.573733</gps>
<country>Egypt</country>
<year>2004</year>
</item>
<item>
<name>Name3</name>
<gps>24.253222 35.539939</gps>
<country>Egypt</country>
<year>2004</year>
</item>
<item>
<name>Name4</name>
<gps>25.429583 34.694408</gps>
<country>Egypt</country>
<year>2007</year>
<notes>https://www.blabla.com</notes>
</item>
<item>
<name>Name5</name>
<gps>25.309756 34.860375</gps>
<country>Egypt</country>
<year>2007</year>
</item>
</tree>"""
#root = ET.parse('test.xml').getroot() # from file
root = ET.fromstring(xml_data) # from variable
items2csv(root, os.path.dirname(__file__) + "/test_output.csv")
wait = input("Press Enter to Exit.")