我试图解析一个XHTML文件,并获得属性和它的值。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/HTMLparser.h>
#include <libxml/xmlmemory.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
void walkTree(xmlNode * a_node)
{
xmlNode *cur_node = NULL;
xmlAttr *cur_attr = NULL;
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
// do something with that node information, like… printing the tag’s name and attributes
printf("Got tag : %s\n", cur_node->name);
for (cur_attr = cur_node->properties; cur_attr; cur_attr = cur_attr->next) {
printf(" -> with attribute : %s\n", cur_attr->name);
printf(" -> with Value: %s\n", (cur_attr->children)->name);
}
walkTree(cur_node->children);
}
}
int main(void)
{
// Load XHTML
char *data;
data = "<html><body class=\"123\" damn=\"123\"></html>";
int len = strlen(data) + 1;
htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, 0);
htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
htmlParseChunk(parser, data, len, 0);
htmlParseChunk(parser, NULL, len, 1);
walkTree(xmlDocGetRootElement(parser->myDoc));
}
我希望得到这样的输出
Got tag: html
Got tag: body
-> with attribute: class
-> with value: 123
-> with attribute: damn
-> with value: 123
但很不幸,我得到了这个输出。
Got tag: html
Got tag: body
-> with attribute: class
-> with value: text
-> with attribute: damn
-> with value: text
我也试过用其他html代码 不管属性值是什么 它总是显示 "text "而不是值.
如何解决这个问题?如何获得真正的属性值?
xmlNodeGetContent()因为它接受xmlNode.不像xmlGetProp()接受xmlNodePtr.所以在使用xmlNode或xmlAttr时,最好使用xmlNodeGetContent()来获取节点的属性。希望能帮到你:)
这里是解决方案。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <libxml/HTMLparser.h>
#include <libxml/xmlmemory.h>
#include <libxml/tree.h>
#include <libxml/parser.h>
void walkTree(xmlNode * a_node)
{
xmlNode *cur_node = NULL;
xmlAttr *cur_attr = NULL;
xmlChar *attr;
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
// do something with that node information, like… printing the tag’s name and attributes
printf("Got tag : %s\n", cur_node->name);
for (cur_attr = cur_node->properties; cur_attr; cur_attr = cur_attr->next) {
printf(" -> with attribute : %s\n", cur_attr->name);
// This part fixed the code :D
attr = xmlNodeGetContent(cur_attr);
printf(" -> with Value: %s\n", attr);
}
walkTree(cur_node->children);
}
}
int main(void)
{
// Load XHTML
char *data;
data = "<html><body class=\"123\" damn=\"123\"></html>";
int len = strlen(data) + 1;
htmlParserCtxtPtr parser = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, 0);
htmlCtxtUseOptions(parser, HTML_PARSE_NOBLANKS | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING | HTML_PARSE_NONET);
htmlParseChunk(parser, data, len, 0);
htmlParseChunk(parser, NULL, len, 1);
walkTree(xmlDocGetRootElement(parser->myDoc));
}
与其直接在属性结构中摸索,不如使用 xmlGetProp
根据该方法 此处:
void
getReference (xmlDocPtr doc, xmlNodePtr cur) {
xmlChar *uri;
cur = cur->xmlChildrenNode;
while (cur != NULL) {
if ((!xmlStrcmp(cur->name, (const xmlChar *)"reference"))) {
uri = xmlGetProp(cur, "uri");
printf("uri: %s\n", uri);
xmlFree(uri);
}
cur = cur->next;
}
return;
}
我认为发生的情况是,属性节点的子节点是一个文本节点,文本为 内容 而不是名字。但使用所提供的API将向你隐藏这一切。而且还提供了一些错误检查(例如,如果 children
是 NULL
).
对Khaled的回答做一个小小的更正。
void walkTree(xmlNode * a_node)
{
xmlNode *cur_node = NULL;
xmlAttr *cur_attr = NULL;
xmlChar *attr;
for (cur_node = a_node; cur_node; cur_node = cur_node->next) {
// do something with that node information, like… printing the tag’s name and attributes
printf("Got tag : %s\n", cur_node->name);
for (cur_attr = cur_node->properties; cur_attr; cur_attr = cur_attr->next) {
printf(" -> with attribute : %s\n", cur_attr->name);
// CORRECTION HERE
attr = xmlNodeGetContent(cur_attr->children);
printf(" -> with Value: %s\n", attr);
}
walkTree(cur_node->children);
}
}
如果没有这个功能,就会在同一个标签的不同属性中打印相同的属性值。