该代码的目的是从xml文件中获取元素并将其解析为csv文件。
最近我已经更新了代码,所以代码转到了[[Phones(ProjectData的另一个子代),它从Set中获取元素,并且Get将它们与下划线附加在一起,并将其解析为第一列标题名称为Identify这是我的代码
from xml.etree import ElementTree as ET
from collections import defaultdict
from pathlib import Path
import csv
from pathlib import Path
directory = 'C:/Users/TRY/try.xml'
with open('try.csv', 'w', newline='') as f:
writer = csv.writer(f, delimiter=';')
#◙ writer = csv.writer(f)
headers = ['identify','id', 'service_code', 'rational', 'qualify', 'description_num', 'description_txt','Counter', 'set_data_xin', 'set_data_xax', 'set_data_value', 'set_data_x']
writer.writerow(headers)
xml_files_list = list(map(str,Path(directory).glob('**/*.xml')))
for xml_file in xml_files_list:
tree = ET.parse(xml_file)
root = tree.getroot()
p_get = tree.find('.//Phones/Get').text
p_set = tree.find('.//Phones/Set').text
start_nodes = root.findall('.//START')
for sn in start_nodes:
row = defaultdict(str)
# <<<<< Indentation was wrong here
for k,v in sn.attrib.items():
row[k] = v
for rn in sn.findall('.//Rational'):
row['Rational'] = rn.text
for qu in sn.findall('.//Qualify'):
row['Qualify'] = qu.text
for ds in sn.findall('.//Description'):
row['Description_txt'] = ds.text
row['Description_text_id'] = ds.attrib['text_id']
for counter, st in enumerate( sn.findall('.//SetData') ):
for k,v in st.attrib.items():
if v.startswith("-"):
v = v.replace("-","",1)
v=v.replace(',', '.')
row['SetData_'+ str(k)] = v
row["Counter"] = counter
row_data = [row[i] for i in headers]
row_data[0]=p_get + '_' + p_set
writer.writerow(row_data)
row = defaultdict(str)
这是xml文件
<?xml version="1.0" encoding="utf-8"?> <ProjectData> <Phones> <Date /> <Prog /> <Box /> <Feature /> <IN>MAFWDS</IN> <Set>234234</Set> <Pr>23423</Pr> <Number>afasfhrtv</Number> <Simple>dfasd</Simple> <Nr /> <Get>6070106091</Get> <Reno>1233</Reno> </Phones> <FINAL> <START id="B001" service_code="0x5196"> <Docs Docs_type="START"> <Rational>225196</Rational> <Qualify>6251960000A0DE</Qualify> </Docs> <Description num="1213f2312">The parameter</Description> <DataFile dg="12" dg_id="let"> <SetData value="32" /> </DataFile> </START> <START id="C003" service_code="0x517B"> <Docs Docs_type="START"> <Rational>23423</Rational> <Qualify>342342</Qualify> </Docs> <Description num="3423423f3423">The third</Description> <DataFile dg="55" dg_id="big"> <SetData x="E1" value="21259" /> <SetData x="E2" value="02" /> </DataFile> </START> <START id="Z048" service_code="0x5198"> <RawData rawdata_type="ASDS"> <Rational>225198</Rational> <Qualify>3432433</Qualify> </RawData> <Description num="434234234">The forth</Description> <DataFile unit="21" unit_id="FEDS"> <FileX unit="eg" discrete="false" axis_pts="19" name="Vsome" text_id="bx5" unit_id="GDFSD" /> <SetData xin="5" xax="233" value="323" /> <SetData xin="123" xax="77" value="555" /> <SetData xin="17" xax="65" value="23" /> </DataFile> </START> </FINAL> </ProjectData>
输出看起来像这样,应该看起来像。被重复。我想要,所以不再重复。我尝试使用该代码,并在代码中移动此变量,但没有成功,由于列是由xml文件的另一个子元素组成的,因此很难解决此问题。问题是列[[identify
row
之前直接在for counter, st in enumerate( sn.findall('.//SetData') ):
中进行设置>例如:
row['identify']=p_get + '_' + p_set
for counter, st in enumerate( sn.findall('.//SetData') ):
for k,v in st.attrib.items():
if v.startswith("-"):
v = v.replace("-","",1)
v=v.replace(',', '.')
row['SetData_'+ str(k)] = v
row["Counter"] = counter
row_data = [row[i] for i in headers]
writer.writerow(row_data)
row = defaultdict(str)