我想转换此 XML:
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:bsvc="urn:com.workday/bsvc">
<soapenv:Header>
<bsvc:Workday_Common_Header>
<!--Optional:-->
<bsvc:Include_Reference_Descriptors_In_Response>?</bsvc:Include_Reference_Descriptors_In_Response>
</bsvc:Workday_Common_Header>
</soapenv:Header>
<soapenv:Body>
<bsvc:Gender_Data bsvc:version="?">
<!--Optional:-->
<bsvc:Country_Reference bsvc:Descriptor="?">
<!--Zero or more repetitions:-->
<bsvc:ID bsvc:type="?">?</bsvc:ID>
</bsvc:Country_Reference>
<!--You have a CHOICE of the next 2 items at this level-->
<!--Optional:-->
<bsvc:Is_Female>?</bsvc:Is_Female>
<!--Optional:-->
<bsvc:Is_Male>?</bsvc:Is_Male>
<bsvc:Gender_Description>?</bsvc:Gender_Description>
<bsvc:Gender_Code>?</bsvc:Gender_Code>
<!--Optional:-->
<bsvc:Inactive>?</bsvc:Inactive>
</bsvc:Gender_Data>
</soapenv:Body>
</soapenv:Envelope>
至:
<bsvc:Gender_Data bsvc:version="?" xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:bsvc="urn:com.workday/bsvc">
<bsvc:Country_Reference bsvc:Descriptor="?">
<bsvc:ID bsvc:type="?">?</bsvc:ID>
</bsvc:Country_Reference>
<bsvc:Is_Female>?</bsvc:Is_Female>
<bsvc:Is_Male>?</bsvc:Is_Male>
<bsvc:Gender_Description>?</bsvc:Gender_Description>
<bsvc:Gender_Code>?</bsvc:Gender_Code>
<bsvc:Inactive>?</bsvc:Inactive>
</bsvc:Gender_Data>
基本上删除前8行和后2行。删除所有注释的节点并将属性
xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"
和 xmlns:bsvc="urn:com.workday/bsvc"
附加到第一个节点/根节点。
我尝试了这段代码,但遇到了前缀问题。
import os
import xml.etree.ElementTree as ET
# Specify the directory path where your XML files are located
directory_path = '/Users/File_Path/'
# Define the namespace mappings
namespaces = {
'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/',
'bsvc': 'urn:com.workday/bsvc'
}
# Iterate through the XML files in the directory and process them
for filename in os.listdir(directory_path):
if filename.endswith('.xml'):
# Construct the full file path
file_path = os.path.join(directory_path, filename)
# Read the XML content from the file
with open(file_path, 'r') as file:
xml_lines = file.readlines()
# Remove the first 8 lines and last 2 lines
modified_lines = xml_lines[8:-2]
# Join the modified lines back into a string
modified_xml = ''.join(modified_lines)
# Create a new root element with namespace declarations
root = ET.Element('{http://schemas.xmlsoap.org/soap/envelope/}Envelope', nsmap=namespaces)
# Parse the modified XML content and append it to the new root
modified_root = ET.fromstring(modified_xml)
root.extend(modified_root)
# Remove all commented nodes
for elem in root.iter():
for child in elem:
if child.tag == ET.Comment:
elem.remove(child)
# Convert the modified XML back to a string
modified_xml = ET.tostring(root, encoding='utf8').decode()
# Overwrite the original file with the modified content
with open(file_path, 'w') as file:
file.write(modified_xml)
print(f"Processed and overwritten: {file_path}")
为什么不查找性别数据,只将结果提取到文件中:
import xml.etree.ElementTree as ET
from io import StringIO
xml_tree = """<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:bsvc="urn:com.workday/bsvc">
<soapenv:Header>
<bsvc:Workday_Common_Header>
<!--Optional:-->
<bsvc:Include_Reference_Descriptors_In_Response>?</bsvc:Include_Reference_Descriptors_In_Response>
</bsvc:Workday_Common_Header>
</soapenv:Header>
<soapenv:Body>
<bsvc:Gender_Data bsvc:version="?">
<!--Optional:-->
<bsvc:Country_Reference bsvc:Descriptor="?">
<!--Zero or more repetitions:-->
<bsvc:ID bsvc:type="?">?</bsvc:ID>
</bsvc:Country_Reference>
<!--You have a CHOICE of the next 2 items at this level-->
<!--Optional:-->
<bsvc:Is_Female>?</bsvc:Is_Female>
<!--Optional:-->
<bsvc:Is_Male>?</bsvc:Is_Male>
<bsvc:Gender_Description>?</bsvc:Gender_Description>
<bsvc:Gender_Code>?</bsvc:Gender_Code>
<!--Optional:-->
<bsvc:Inactive>?</bsvc:Inactive>
</bsvc:Gender_Data>
</soapenv:Body>
</soapenv:Envelope>"""
f = StringIO(xml_tree)
tree = ET.parse(f)
root = tree.getroot()
nsmap= {'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', 'bsvc': 'urn:com.workday/bsvc'}
ET.register_namespace("soapenv", "http://schemas.xmlsoap.org/soap/envelope/")
ET.register_namespace("bsvc", "urn:com.workday/bsvc")
new_root = ET.Element("root")
Envelope = ET.SubElement(new_root, ET.QName(nsmap['soapenv'], 'Envelope'))
gen = root.find(".//bsvc:Gender_Data", nsmap)
Envelope.append(gen)
# Pretty Print acnd write to file
tree = ET.ElementTree(new_root)
ET.indent(tree, space=" ")
tree.write("test.xml", xml_declaration=True, encoding="utf-8")
ET.dump(tree)
输出:
<root xmlns:bsvc="urn:com.workday/bsvc" xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
<soapenv:Envelope>
<bsvc:Gender_Data bsvc:version="?">
<bsvc:Country_Reference bsvc:Descriptor="?">
<bsvc:ID bsvc:type="?">?</bsvc:ID>
</bsvc:Country_Reference>
<bsvc:Is_Female>?</bsvc:Is_Female>
<bsvc:Is_Male>?</bsvc:Is_Male>
<bsvc:Gender_Description>?</bsvc:Gender_Description>
<bsvc:Gender_Code>?</bsvc:Gender_Code>
<bsvc:Inactive>?</bsvc:Inactive>
</bsvc:Gender_Data>
</soapenv:Envelope>
</root>