解析XML的最快方法

问题描述 投票:0回答:1

我有巨大的XML文件,每个大约1GB。由于数据量太大,即使在Notepad++中也无法打开。

我编辑了XML,并且能够通过DOMDocument60来解析它(感谢stackflow提供的帮助)。

我也曾读过类似的问题 提高VBA的速度 但我还是无法正常实施,所以需要一些指导。

比如说

  1. 如何在SAX中加载xml?在SAX中读取之前,是否需要在DOMDocument60中加载?
  2. 导入后如何在SAX中逐行读取?在DOMDocument60中,我可以很容易地跳转到任何一个节点,然后在子节点中循环,但不知道如何在SAX中做到这一点?
  3. 与DOMDocument相比,SAX在解析巨大的XML时有什么时间差?我没有找到任何实时的例子。
  4. 有没有比VBA中的SAX更好的选择,就像其他库一样,我可以用它来加快Parsing的过程。

感谢你的建议。(XML文件样本如下)

<ParentNode type="actual">

<SampleObject class="POC" version="XYZ123" distName="Test1" id="Sample">
  <p name="name">POC1</p>
  <p name="object1">0</p>
  <p name="object2">6</p>
  <p name="object3">0</p>
</SampleObject>

<SampleObject class="POC" version="XYZ123" distName="Test2" id="Sample">
  <p name="name">POC1</p>
  <p name="object1">2</p>
  <p name="object2">10</p>
  <p name="object4">4</p>
  <p name="object3">6</p>
</SampleObject>

<SampleObject class="POC" version="XYZ123" distName="Test3" id="Sample">
  <p name="name">POC1</p>
  <p name="object2">90</p>
  <p name="object3">0</p>
</SampleObject>

<SampleObject class="POC" version="XYZ123" distName="Test4" id="Sample">
  <p name="name">POC1</p>
  <p name="object1">2</p>
  <p name="object2">10</p>
  <p name="object4">40</p>
  <p name="object3">61</p>
</SampleObject>

xml vba ms-access xml-parsing
1个回答
0
投票

这是我按照我上面发的链接,用你的XML样本得到的结果.只是输出到立即窗口。我不知道你在做什么与提取的数据... ...

测试方法在常规模块。

Sub Tester()

    Const FNAME As String = "example.xml"
    Dim rdr As New MSXML2.SAXXMLReader30
    Dim cnth As New ContentHandler

    Set rdr.ContentHandler = cnth
    rdr.parseURL ThisWorkbook.Path & "\" & FNAME  'test xml file is in same folder as the workbook

End Sub

类模块 ContentHandler:

Option Explicit

Implements IVBSAXContentHandler

Dim cls, vers, distName, id, pName, pContent
Dim inSO As Boolean, inP As Boolean

Private Sub IVBSAXContentHandler_characters(strChars As String)
    If inP Then Debug.Print "P content:", strChars
End Sub

Private Sub IVBSAXContentHandler_startElement(strNamespaceURI As String, _
                             strLocalName As String, strQName As String, _
                             ByVal oAttributes As MSXML2.IVBSAXAttributes)
    Select Case strLocalName
        Case "SampleObject"
            inSO = True
            cls = oAttributes.getValueFromName("", "class")
            vers = oAttributes.getValueFromName("", "version")
            distName = oAttributes.getValueFromName("", "distName")
            id = oAttributes.getValueFromName("", "id")
            Debug.Print "Start", strLocalName, cls, vers, distName, id
        Case "p"
            inP = True
            pName = oAttributes.getValueFromName("", "name")
            Debug.Print "Start", strLocalName, pName
    End Select
End Sub

Private Sub IVBSAXContentHandler_endElement(strNamespaceURI As String, strLocalName As String, strQName As String)
    Select Case strLocalName
        Case "SampleObject"
            inSO = False
            cls = ""
            vers = ""
            distName = ""
            id = ""
        Case "p"
            pName = ""
            inP = False
    End Select
End Sub

Private Property Set IVBSAXContentHandler_documentLocator( _
                            ByVal RHS As MSXML2.IVBSAXLocator)
End Property

Private Sub IVBSAXContentHandler_startDocument()
End Sub

Private Sub IVBSAXContentHandler_endDocument()
End Sub

Private Sub IVBSAXContentHandler_endPrefixMapping(strPrefix As String)
End Sub

Private Sub IVBSAXContentHandler_ignorableWhitespace(strChars As String)
End Sub

Private Sub IVBSAXContentHandler_processingInstruction(strTarget As String, strData As String)
End Sub

Private Sub IVBSAXContentHandler_skippedEntity(strName As String)
End Sub

Private Sub IVBSAXContentHandler_startPrefixMapping(strPrefix As String, strURI As String)
End Sub

0
投票

我试图在 "IVBSAXContentHandler_StartElement "中输入一个新的Sub(在每个选择的情况下),但它产生了一个错误。

Public Sub Ins_2G1(strLocalName As String, cls As String, vers As String, distName As String, id As String)
    Dim DNameArr() As String
    Dim insertcol As String
    Dim insertval As String

    DNameArr() = Split(distName, "/")
    colvalues(0) = distName
    colvalues(1) = DNameArr(1)
    colvalues(2) = DNameArr(2)
    colvalues(3) = DNameArr(3)

    'Converting Generated Parameter Name Array in to String
    insertcol = ""
    For i = LBound(colnames) To UBound(colnames)
        insertcol = insertcol + CStr(colnames(i))
        If i < UBound(colnames) Then
            insertcol = insertcol + ","
        End If
    Next

    'Converting Generated Value Array in to String
    insertval = "'"
    For i = LBound(colvalues) To UBound(colvalues) + 3
        insertval = insertval + CStr(colvalues(i))
        If i < UBound(colvalues) Then
            insertval = insertval + "','"
        End If
    Next
    insertval = insertval + "'"

    'Inserting Record in to POC table
    strSql = "INSERT INTO [" & cls & "] (" & insertcol & ") VALUES (" & insertval & ");"
    db.Execute strSql


End Sub
© www.soinside.com 2019 - 2024. All rights reserved.