无法让脚本异步等待特定时间以使标题解析,然后再寻找下一个URL

问题描述 投票:0回答:1

我正在尝试使用vbaServerXMLHTTP60中创建脚本,以从某些相同的链接中解析第一篇文章的标题。我在这里的主要目标是使脚本异步进行,同时设置最长的时间,直到尝试下一个URL为止,脚本将尝试该时间。

但是,我创建的宏总是在出现超时而无法从链接中删除标题的情况下使用下一个URL。

Sub FetchContentWithinSpecificTime()
    Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
    Dim URL As Variant, Urllist As Variant, t As Date, sResp As Boolean

    Urllist = Array( _
        "https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=1", _
        "https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=2", _
        "https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=3", _
        "https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=4", _
        "https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=5" _
    )

    For Each URL In Urllist
        Debug.Print "trying with: " & URL
        With oHttp
            .Open "GET", URL, True
            .setRequestHeader "User-Agent", "Mozilla/5.0"
            .setTimeouts 5000, 5000, 15000, 15000
            .send
            t = Now + TimeValue("00:00:10")
            sResp = False

            On Error Resume Next
            Do
                If .readyState = 4 Then sResp = True: Exit Do
                If Now > t Then sResp = False: Exit Do
                DoEvents
            Loop
            On Error GoTo 0

            If sResp Then
                HTML.body.innerHTML = .responseText
                Debug.Print HTML.querySelector(".question-hyperlink").innerText
            Else:
                Debug.Print "failed with: " & URL
            End If
        End With
    Next URL
End Sub

在下一个URL之前,如何使脚本等待一段时间以解析标题?

vba asynchronous web-scraping settimeout serverxmlhttp
1个回答
0
投票
Sub FetchContentWithinSpecificTime() Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument Dim URL As Variant, Urllist As Variant, t As Date Dim sPrice$, sResp As Boolean Urllist = Array( _ "https://finance.yahoo.com/quote/NZDUSD=X?p=NZDUSD=X", _ "https://finance.yahoo.com/quote/FB?p=FB", _ "https://finance.yahoo.com/quote/AAPL?p=AAPL", _ "https://finance.yahoo.com/quote/IBM?p=IBM", _ "https://finance.yahoo.com/quote/UCO?p=UCO" _ ) For Each URL In Urllist Debug.Print "trying with: " & URL With oHttp .Open "GET", URL, True .setRequestHeader "User-Agent", "Mozilla/5.0" .send t = Now + TimeValue("00:00:10") sResp = False Do While .readyState < 4 If .readyState = 4 Then Exit Do sResp = (Now > t) Or (Err.Number <> 0) If sResp Then Exit Do DoEvents Loop If Not sResp Then HTML.body.innerHTML = .responseText sPrice = HTML.querySelector(".Mb\(-4px\)").innerText Debug.Print sPrice Else: Debug.Print "failed with: " & URL End If End With Next URL End Sub
© www.soinside.com 2019 - 2024. All rights reserved.