如何使用python脚本将xml文件转换为csv

问题描述 投票:0回答:2

我有一个具有特殊结构的 xml 文件,我需要使用脚本 python 将其转换为 csv 文件 这是我的 xml 文件的一部分:

<?xml version="1.0" encoding="UTF-8"?>
<results version="2">
    <cppcheck version="2.9"/>
    <errors>
        <error identifier="redundantAssignment" errorStyle="style" msg="Variable &apos;ret&apos; is reassigned a value before the old one has been used.">
            <location file="D:\test\main.c" line="64" column="8" info="ret is overwritten"/>
            <location file="D:\test\main.c" line="62" column="8" info="ret is assigned"/>
            <symbol>ret</symbol>
        </error>
        <error identifier="redundantAssignment" errorStyle="style" msg="Variable &apos;ret&apos; is reassigned a value before the old one has been used.">
            <location file="D:\test\data.c" line="93" column="8" info="ret is overwritten"/>
            <location file="D:\test\data.c" line="91" column="8" info="ret is assigned"/>
            <symbol>ret</symbol>
        </error>
    </errors>
</results>

我正在使用这个脚本,但它对我不起作用:

import xml.etree.ElementTree as ET
import csv

# PARSE XML
xml = ET.parse("./error.xml")
root = xml.getElementsByTagName()

# CREATE CSV FILE
csvfile = open("data.csv",'w',encoding='utf-8')
csvfile_writer = csv.writer(csvfile)

# ADD THE HEADER TO CSV FILE
csvfile_writer.writerow(["identifier","file","errorStyle","msg"])

# FOR EACH EMPLOYEE
for error in root.findall("errors/error"):
    
    if(error):
       # EXTRACT EMPLOYEE DETAILS  
      identifier = error.get('identifier')
      file = error.find('file')
      errorStyle = error.find("errorStyle")
      msg = error.find("msg")
      csv_line = [identifier, file.text, errorStyle.text, msg.text]
      
      # ADD A NEW ROW TO CSV FILE
      csvfile_writer.writerow(csv_line)
csvfile.close()

python xml csv
2个回答
0
投票

请参考以下代码:

import xml.etree.ElementTree as ET
import csv

xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<results version="2">
    <cppcheck version="2.9"/>
    <errors>
        <error identifier="redundantAssignment" errorStyle="style" msg="Variable &apos;ret&apos; is reassigned a value before the old one has been used.">
            <location file="Din.c" line="64" column="8" info="ret is overwritten"/>
            <location file="D.c" line="62" column="8" info="ret is assigned"/>
            <symbol>ret</symbol>
        </error>
        <error identifier="redundantAssignment" errorStyle="style" msg="Variable &apos;ret&apos; is reassigned a value before the old one has been used.">
            <location file="Dta.c" line="93" column="8" info="ret is overwritten"/>
            <location file="Dta.c" line="91" column="8" info="ret is assigned"/>
            <symbol>ret</symbol>
        </error>
    </errors>
</results>"""

root = ET.fromstring(xml_data)

csvfile = open("data.csv",'w')
csvfile_writer = csv.writer(csvfile)
csvfile_writer.writerow(["msg","identifier","errorStyle"])

for child in root:
    for item in child:
        csv_line = [item.attrib["msg"],item.attrib["identifier"] , item.attrib["errorStyle"]]
        csvfile_writer.writerow(csv_line)
        print item.attrib
csvfile.close()

希望这有帮助,谢谢。


0
投票

注意:不是原始问题的答案,而是一个有价值的示例,具体取决于 xml 结构: 我在使用 csv 和 pandas 模块强制 csv 输出为文本字段时遇到了麻烦。 (没有引号或三引号) 当您有一个相当简单的 xml,您只想将属性或子元素转换为 csv 时,我想出了一个仅包含简单文件 IO、格式化程序和生成器表达式的解决方案:

import os
import xml.etree.ElementTree as ET

def items2csv(root, csv_path):
    attributes = ['name', 'gps', 'country', 'year', 'notes']
    csvfile = open(csv_path, 'w')
    
    # Column headers
    line = '"{}", "{}", "{}", "{}", "{}"'.format(*attributes)
    csvfile.write(line)

    for parent in root:
        values = [(parent.get(attrib) if parent.get(attrib) != None else '') for attrib in attributes]
        line = '\n"{}", "{}", "{}", "{}", "{}"'.format(*values)
        csvfile.write(line)
    csvfile.close()

xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<tree>
    <item name="Name1" gps="24.227191 35.573413" country="Egypt" year="2004"></item>
    <item name="Name2" gps="24.228596 35.573733" country="Egypt" year="2004"></item>
    <item name="Name3" gps="24.253222 35.539939" country="Egypt" year="2004"></item>
    <item name="Name4" gps="25.429583 34.694408" country="Egypt" year="2007" notes="https://www.blabla.com "></item>
    <item name="Name5" gps="25.309756 34.860375" country="Egypt" year="2007"></item>
</tree>"""   

#root = ET.parse('test.xml').getroot() # from file
root = ET.fromstring(xml_data) # from variable

items2csv(root, os.path.dirname(__file__) + "/test_output.csv")

wait = input("Press Enter to Exit.")

以及基于子元素的简单 xml 结构的示例:

#!/usr/bin/python
import os
import xml.etree.ElementTree as ET

def items2csv(root, csv_path):
    tags = ['name', 'gps', 'country', 'year', 'notes']
    csvfile = open(csv_path, 'w')
    
    # Column headers
    line = '"{}", "{}", "{}", "{}", "{}"'.format(*tags)
    csvfile.write(line)

    for parent in root:
        values = [(parent.findtext(tag) if parent.findtext(tag) != None else '') for tag in tags]
        line = '\n"{}", "{}", "{}", "{}", "{}"'.format(*values)
        csvfile.write(line)
    csvfile.close()

xml_data = """<?xml version="1.0" encoding="UTF-8"?>
<tree>
    <item>
        <name>Name1</name>
        <gps>24.227191 35.573413</gps>
        <country>Egypt</country>
        <year>2004</year>
    </item>
    <item>
        <name>Name2</name>
        <gps>24.228596 35.573733</gps>
        <country>Egypt</country>
        <year>2004</year>
    </item>
    <item>
        <name>Name3</name>
        <gps>24.253222 35.539939</gps>
        <country>Egypt</country>
        <year>2004</year>
    </item>
    <item>
        <name>Name4</name>
        <gps>25.429583 34.694408</gps>
        <country>Egypt</country>
        <year>2007</year>
        <notes>https://www.blabla.com</notes>
    </item>
    <item>
        <name>Name5</name>
        <gps>25.309756 34.860375</gps>
        <country>Egypt</country>
        <year>2007</year>
    </item>
</tree>"""   

#root = ET.parse('test.xml').getroot() # from file
root = ET.fromstring(xml_data) # from variable

items2csv(root, os.path.dirname(__file__) + "/test_output.csv")

wait = input("Press Enter to Exit.")
© www.soinside.com 2019 - 2024. All rights reserved.