使用excel VBA从运行脚本以显示表数据的网页中获取数据

问题描述 投票:1回答:1

研究这一天的第二天。我只是没有得到它。网页是公开的:https://register.fca.org.uk/ShPo_FirmDetailsPage?id=001b000000MfF1EAAV手动我pgdn x 2到达按钮[+]个人,点击它然后pgdn x 1进入“每页结果”下拉并将其更改为500.然后复制并粘贴结果成为卓越

这是我在这个网站上找到的代码“从网上插入数据时选择一个下拉列表(VBA)”由QHarr回答,我试图适应并失败了。我把“帮助”放在我认为我应该做出改变的地方,但我只是在猜测

Public Sub MakeSelectiongGetData()
Dim IE As New InternetExplorer
Const URL = "https://register.fca.org.uk/ShPo_FirmDetailsPage?id=001b000000Mfe5TAAR#ShPo_FirmDetailsPage"
'Const optionText As String = "RDVT11"
Application.ScreenUpdating = False
With IE
    .Visible = True
    .navigate URL

    While .Busy Or .readyState < 4: DoEvents: Wend

    Dim a As Object
    Set a = .document.getElementById("HELP")

    Dim currentOption As Object
    For Each currentOption In a.getElementsByTagName("HELP")
        If InStr(currentOption.innerText, optionText) > 0 Then
            currentOption.Selected = "HELP"
            Exit For
        End If
    Next currentOption
    .document.getElementById("HELP").Click
    While .Busy Or .readyState < 4: DoEvents: Wend

    Dim nTable As HTMLTable

    Do: On Error Resume Next: Set nTable = .document.getElementById("HELP"): On Error GoTo 0: DoEvents: Loop While nTable Is Nothing

    Dim nRow As Object, nCell As Object, r As Long, c As Long

    With ActiveSheet
        Dim nBody As Object
        Set nBody = nTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")
        .Cells(1, 1) = nBody(0).innerText
        For r = 2 To nBody.Length - 1
            Set nRow = nBody(r)
            For Each nCell In nRow.Cells
                c = c + 1: .Cells(r + 1, c) = nCell.innerText
            Next nCell
            c = 0
      Next r
End With
.Quit
End With
Application.ScreenUpdating = True
End Sub

所以我已经把你的变化包括在内了。

Public Sub MakeSelections()
Dim IE As New InternetExplorer
With IE
    .Visible = True
    .Navigate2 "https://register.fca.org.uk/ShPo_FirmDetailsPage?id=001b000000MfF1EAAV"

    While .Busy Or .readyState < 4: DoEvents: Wend

    .document.querySelector("[href*=FirmIndiv]").Click '<==click the + for indiv
    .document.querySelector("#IndividualSearchResults_length[value='500']").Selected = True
End With

Dim nTable As HTMLTable

Do: On Error Resume Next: Set nTable =IE.document.getElementById("IndividualSearchResults"): On Error GoTo 0: DoEvents: Loop While nTable Is Nothing

Dim nRow As Object, nCell As Object, r As Long, c As Long

With ActiveSheet
    Dim nBody As Object
    Set nBody = nTable.getElementsByTagName("Name")(0) _
                      .getElementsByTagName("ShG1_IRN_c") _
                      .getElementsByTagName("ShGl_IndividualStatus__c") _
                      .getElementsByTagName("ShPo_Registerstatus__c") _
                      .getElementsByTagName("Id") _
                      .getElementsByTagName("RecordTypeId") _
                      .getElementsByTagName("CurrencyIsoCode") _
    .Cells(1, 1) = nBody(0).innerText
    For r = 2 To nBody.Length - 1
        Set nRow = nBody(r)
        For Each nCell In nRow.Cells
            c = c + 1: .Cells(r + 1, c) = nCell.innerText
        Next nCell
        c = 0
    Next r
End With

End Sub
html excel vba internet-explorer web-scraping
1个回答
0
投票

您可以使用css attribute = value选择器来定位+个人,也可以选择500

 Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub MakeSelections()
    Dim IE As New InternetExplorer
    With IE
        .Visible = True
        .Navigate2 "https://register.fca.org.uk/ShPo_FirmDetailsPage?id=001b000000MfF1EAAV"

        While .Busy Or .readyState < 4: DoEvents: Wend

        .document.querySelector("[href*=FirmIndiv]").Click '<==click the + for indiv
        .document.querySelector("#IndividualSearchResults_length [value='500']").Selected = True

        Dim event_onchange As Object
        Set event_onchange = .document.createEvent("HTMLEvents")
        event_onchange.initEvent "change", True, False

        .document.querySelector("[name=IndividualSearchResults_length]").dispatchEvent event_onchange

        Application.Wait Now + TimeSerial(0, 0, 5)
        Dim clipboard As Object, ws As Worksheet

        Set clipboard = GetObject("New:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
        Set ws = ThisWorkbook.Worksheets("Sheet1")
        clipboard.SetText .document.querySelector("#IndividualSearchResults").outerHTML
        clipboard.PutInClipboard
        ws.Cells(1, 1).PasteSpecial
        .Quit
    End With
End Sub

这个选择器[href*=FirmIndiv]是一个带有contains(*)修饰符的attribute = value选择器。它查找包含href值中子字符串FirmIndivhref属性的匹配项。 querySelector HTMLDocument *(ie.Document)的所有方法都将返回找到的第一个匹配项。

你可以在这里看到匹配:

option标记元素的选择器(结果计数的父select标记包含子option标记元素):

#IndividualSearchResults_length [value='500']

它使用id (#) selector来定位父级select元素的div父级,通过其id值IndividualSearchResults_length,然后使用descendant combinator(“”),后跟attribute = value选择器,用option = value指定500元素。

你可以在这里看到:

enter image description here


Selenium基本版:

Option Explicit 
Public Sub MakeChanges()
'VBE > Tools > References > Selenium Type Library
'Download: https://github.com/florentbr/SeleniumBasic/releases/tag/v2.0.9.0
    Dim d As WebDriver
    Set d = New ChromeDriver
    Const url = "https://register.fca.org.uk/ShPo_FirmDetailsPage?id=001b000000MfF1EAAV"

    With d
        .Start "Chrome"
        .get url
        .FindElementByCss("[href*=FirmIndiv]").Click
         .FindElementByCss("[name=IndividualSearchResults_length]").WaitDisplayed True, 10000
         .FindElementByCss("[name=IndividualSearchResults_length]").AsSelect.SelectByValue "500"
        Stop                                     '<==delete me later
        .Quit
    End With
End Sub
© www.soinside.com 2019 - 2024. All rights reserved.