XSLT从HTML xml中提取值

问题描述 投票:0回答:1

我有一个xml

而且,我需要提取生成的表的值。特别是每行的第2列和第3行的行值。

HTML看起来像

table

而xml看起来像:

<DIV><DIV><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><TR></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"><COL width="122px"><COL width="122px"><COL width="122px"><COL width="122px"></COLGROUP><TR></TR><TR><TD align="LEFT" colspan="5" style="border: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Nutrition</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Typical Values</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">Per 100g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">One tart (125g)</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">%RI*</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">RI*</SPAN></TD></TR></TABLE></TD></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"><COL width="122px"><COL width="122px"><COL width="122px"><COL width="122px"></COLGROUP><TR></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Energy</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1373kJ / 329kcal</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1717kJ / 411kcal</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">20%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">8400kJ / 2000kcal</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Fat</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">20.0g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">25.0g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">36%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">70g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Saturates</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">11.2g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">14.0g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">70%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">20g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Carbohydrate</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">32.9g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">41.1g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">16%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">260g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Sugars</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">16.2g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">20.2g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">22%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">90g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Fibre</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1.3g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">1.6g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">&nbsp;</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">&nbsp;</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Protein</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">3.9g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">4.9g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;"><SPAN style="font-size: inherit;">10%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;"><SPAN style="font-size: inherit;">50g</SPAN></TD></TR><TR><TD colspan="1" style="width: 160px;border-left: 1px solid black;border-bottom: 1px solid black;padding-left: 3px;"><SPAN style="font-size: inherit;">Salt</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">0.1g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">0.1g</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">2%</SPAN></TD><TD align="CENTER" colspan="1" style="width: 122px;border-right: 1px solid black;border-bottom: 1px solid black;"><SPAN style="font-size: inherit;">6g</SPAN></TD></TR></TABLE></TD></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"><COL width="122px"><COL width="122px"><COL width="122px"><COL width="122px"></COLGROUP><TR></TR><TR><TD colspan="5" style="border-left: 1px solid black;border-right: 1px solid black;"><SPAN style="font-size: inherit;">Contains 2 servings</SPAN></TD></TR></TABLE></TD></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><COLGROUP><COL width="160px"></COLGROUP><TR></TR><TR><TD colspan="1"><TABLE border-collapse="collapse" cellspacing="0" style="width: 650px;"><TR></TR><TR><TD colspan="1" style="border-left: 1px solid black;border-right: 1px solid black;border-bottom: 1px solid black;padding-left: 3px;"><P><SPAN>* Reference intake of an average adult (8400 kJ / 2000 kcal)</SPAN></P></TD></TR></TABLE></TD></TR></TABLE></TD></TR></TABLE></DIV></DIV>

我尝试了什么:我需要将值存储在xslt中的变量中。

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <for-each select="//SPAN">
        <value-of select="." />
    </for-each>
</xsl:stylesheet>

我将如何获得价值,具体而言我想知道:

EnergyCol2

EnergyCol3

值。并且,希望他们在变量中。我怎么知道特定值是第2列(或3)并且是类型(能量或脂肪等)

xml xslt html-table xslt-1.0
1个回答
-1
投票

虽然这不能回答这个问题,因为我使用了正则表达式来解析html xml,但它仍然可以完成我的工作。所以,我从XSLT调用java函数。

和java代码:

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class NutrientValues {

private static final String regex = "Energy.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Fat.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Saturates.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Carbohydrate.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Sugars.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Fibre.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>.*?Protein.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPA    N>.*?Salt.*?<SPAN.*?>(.*?)<\\/SPAN>.*?<SPAN.*?>(.*?)<\\/SPAN>";
    private static final Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
    private static Matcher matcher;
    public static boolean process(String htmldoc) {
        matcher = pattern.matcher(htmldoc);
        return matcher.find();
    }
    public static String getEnergyPer100() {
        return matcher.group(1);
    }
    public static String getEnergyPerServ() {
        return matcher.group(2);
    }
    public static String getFatPer100() {
        return matcher.group(3);
    }
    public static String getFatPerServ() {
        return matcher.group(4);
    }
    public static String getSaturatesPer100() {
        return matcher.group(5);
    }
    public static String getSaturatesPerServ() {
        return matcher.group(6);
    }
    public static String getCarbohydratePer100() {
        return matcher.group(7);
    }
    public static String getCarbohydratePerServ() {
        return matcher.group(8);
    }
    public static String getSugarsPer100() {
        return matcher.group(9);
    }
    public static String getSugarsPerServ() {
        return matcher.group(10);
    }
    public static String getFibrePer100() {
        return matcher.group(11);
    }
    public static String getFibrePerServ() {
        return matcher.group(12);
    }
    public static String getProteinPer100() {
        return matcher.group(13);
    }
    public static String getProteinPerServ() {
        return matcher.group(14);
    }
    public static String getSaltPer100() {
        return matcher.group(15);
    }
    public static String getSaltPerServ() {
        return matcher.group(16);
    }
}

结果:

Group 1: 1373kJ / 329kcal
Group 2: 1717kJ / 411kcal
Group 3: 20.0g
Group 4: 25.0g
Group 5: 11.2g
Group 6: 14.0g
Group 7: 32.9g
Group 8: 41.1g
Group 9: 16.2g
Group 10: 20.2g
Group 11: 1.3g
Group 12: 1.6g
Group 13: 3.9g
Group 14: 4.9g
Group 15: 0.1g
Group 16: 0.1g
© www.soinside.com 2019 - 2024. All rights reserved.