Web搜集-标签问题

问题描述 投票:0回答:1

我是新抓取网络数据,并且还使用了For ... Next。我正在尝试从网站获取数据(所有页面),但似乎代码错误,因为出现错误91。这是代码:

Dim ie As Object

Sub connect()
Set ie = CreateObject("INTERNETEXPLORER.APPLICATION")
ie.NAVIGATE "https://www.worldathletics.org/world-rankings/100m/men"
ie.Visible = True
End Sub

Sub id_tr_td_for()

Range("a1:z10000").ClearContents

For i = 0 To 10
For j = 0 To 5
Cells(i + 1, j + 1) = ie.document.getElementById("toplists").getElementsByTagName("tr")(i).getElementsByTagName("td")(j).innerText
Next
Next
End Sub

有人可以帮我解决这个问题,还可以告诉我谁可以列出所有页面吗?

谢谢。

excel vba web-scraping getelementsbytagname
1个回答
0
投票

我不确定错误来自哪里,我也明白。

以下代码应该会有所帮助,它将指定页面的表内容打印到调试窗口。

以下代码应将所选页面的所有数据复制到工作表1

您将需要在VBA编辑器中添加几个引用才能使用它。 (工具菜单,参考,然后找到并选择它们)Microsoft HTML Object LibraryMicrosoft Internet Controls

Const MaxPage = 2 ' set to 26 (or however many there are) - at 2 for testing purposes
Dim Browser As InternetExplorer

Sub Start()
Dim Page As Integer: Page = 1 ' start at page 1
Dim PageDocument As IHTMLDocument
Dim RecordRow As IHTMLElementCollection
Dim RecordItem As IHTMLElement

Dim Sheet As Worksheet: Set Sheet = ThisWorkbook.Worksheets("Sheet1") ' output sheet
If Browser Is Nothing Then
    Set Browser = New InternetExplorer
End If
Dim oRow As Integer: oRow = 2 ' begin output at row 2 (account for header)
Dim Record As Integer
For Page = 1 To MaxPage
    LoadPage Page
        For Record = 0 To 99 ' zero index, 100 items (1-99)
        Set PageDocument = Browser.Document
        Set RecordRow = PageDocument.getElementById("toplists").getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).getElementsByTagName("tr")(Record).getElementsByTagName("td")
        Sheet.Cells(oRow, 1).Value = Trim(RecordRow(0).innerText)
        Sheet.Cells(oRow, 2).Value = Trim(RecordRow(1).innerText)
        Sheet.Cells(oRow, 3).Value = Trim(RecordRow(2).innerText)
        Sheet.Cells(oRow, 4).Value = Trim(RecordRow(3).innerText)
        Sheet.Cells(oRow, 5).Value = Trim(RecordRow(4).innerText)
        Sheet.Cells(oRow, 6).Value = Trim(RecordRow(5).innerText)
        oRow = oRow + 1
    Next Record
Next Page
Browser.Quit
End Sub

Sub LoadPage(ByVal PageNumber As Integer)
Debug.Print "Navigating to Page #" & CStr(PageNumber)
Browser.navigate "https://www.worldathletics.org/world-rankings/100m/men?page=" & CStr(PageNumber)
While Browser.readyState <> 4 Or Browser.Busy: DoEvents: Wend
Debug.Print "Navigation Complete"
End Sub
© www.soinside.com 2019 - 2024. All rights reserved.