当我加载this链接时,我得到一个JSON
对象。该对象包含XML
数据。我需要从这些标签内部获取数据,遍历类名,我对类名为job-entry
的数据更感兴趣。
我做的是,我首先通过JSON
函数将这个json2xml数据转换为xml。哪个成功地给了我XML
。然后我希望遍历这个XML
的DOM,所以我这样做了:
var xml_string = json2xml(json_string);
var parser = new DOMParser();
var xmlDoc = parser.parseFromString(xml_string, "text/xml");
在哪里,json_string
是我从链接加载的那个。然后我解析它。根据this tutorial,我现在正在遍历DOM,我收到错误:
undefined不是构造函数(评估'xml_string.getElementsByClassName(“job-entry”)');
这就是我遍历DOM的方式,现在我不想错过它。请注意,我使用casperJS加载JSON。
var jobsURL = "https://de.dpdhl.jobs/search-jobs/results?ActiveFacetID=0&CurrentPage=1&RecordsPerPage=20&Distance=50&ShowRadius=False&SearchResultsModuleName=Search+Results&SearchFiltersModuleName=Search+Filters&SortCriteria=0&SortDirection=1&SearchType=5";
casper.start(jobsURL);
casper.then(function() {
var json_string = JSON.parse(this.getPageContent());
var xml_string = json2xml(json_string);
var parser = new DOMParser();
var xmlDoc = parser.parseFromString(xml_string, "text/xml");
console.log(xml_string.getElementsByClassName("job-entry")[0].textContent);
})
casper.run();
有一些问题阻碍您实现目标。
数字1:来自json2xml
的结果XML似乎包含错误。
XML Validator抛出以下错误:
XML Parsing Error: not well-formed
Location: https://codebeautify.org/xmlvalidator
Line Number 20, Column 217:
<input type="checkbox" autocomplete="off" id="category-filter-2" class="filter-checkbox" data-facet-type="1" data-id="23821" data-count="15" data-display="Division Customer Solutions & Innovation" data-field-name="" />
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------^
2号:你试图在getElementsByClassName()
而不是xml_string
上运行xmlDoc
。
3号:getElementsByClassName()
不适用于XML元素。这是一个HTML函数。
在选择XML中的元素时,最好使用XPath(XML路径语言)。
由于您的XML无效,我可以使用示例XML为您的问题提供解决方案。
CasperJS计划:
var jobsURL = 'https://de.dpdhl.jobs/search-jobs/results?ActiveFacetID=0&CurrentPage=1&RecordsPerPage=20&Distance=50&ShowRadius=False&SearchResultsModuleName=Search+Results&SearchFiltersModuleName=Search+Filters&SortCriteria=0&SortDirection=1&SearchType=5';
var casper = require('casper').create();
casper.start(jobsURL);
casper.then(function () {
// Sample XML
var xml_string = '<omega><root><tower>716379923</tower><job class="job-entry">Veterinary Physician</job><modern>red</modern><written>individual</written><low>help</low><other>689533135.2394588</other></root><root><tower>234423546</tower><job class="job-entry">Software Developer</job><modern>green</modern><written>multiple</written><low>pain</low><other>99999.11111111111</other></root><root><tower>002229385</tower><job class="job-entry">Car Salesman</job><modern>brown</modern><written>absence</written><low>love</low><other>23408.55728904901</other></root></omega>';
var parser = new DOMParser();
var xmlDoc = parser.parseFromString(xml_string, 'text/xml');
var job_entry = xmlDoc.evaluate('//*[@class="job-entry"]', xmlDoc, null, XPathResult.ANY_TYPE, null);
var current_job_entry = job_entry.iterateNext();
while (current_job_entry) {
this.echo(current_job_entry.textContent);
current_job_entry = job_entry.iterateNext();
}
});
casper.run();
结果:
Veterinary Physician
Software Developer
Car Salesman
示例XML:
<omega>
<root>
<tower>716379923</tower>
<job class="job-entry">Veterinary Physician</job>
<modern>red</modern>
<written>individual</written>
<low>help</low>
<other>689533135.2394588</other>
</root>
<root>
<tower>234423546</tower>
<job class="job-entry">Software Developer</job>
<modern>green</modern>
<written>multiple</written>
<low>pain</low>
<other>99999.11111111111</other>
</root>
<root>
<tower>002229385</tower>
<job class="job-entry">Car Salesman</job>
<modern>brown</modern>
<written>absence</written>
<low>love</low>
<other>23408.55728904901</other>
</root>
</omega>