我正在尝试使用vba
在ServerXMLHTTP60
中创建脚本,以从某些相同的链接中解析第一篇文章的标题。我在这里的主要目标是使脚本异步进行,同时设置最长的时间,直到尝试下一个URL为止,脚本将尝试该时间。
但是,我创建的宏总是在出现超时而无法从链接中删除标题的情况下使用下一个URL。
Sub FetchContentWithinSpecificTime()
Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
Dim URL As Variant, Urllist As Variant, t As Date, sResp As Boolean
Urllist = Array( _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=1", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=2", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=3", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=4", _
"https://stackoverflow.com/questions/tagged/web-scraping?tab=newest&page=5" _
)
For Each URL In Urllist
Debug.Print "trying with: " & URL
With oHttp
.Open "GET", URL, True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setTimeouts 5000, 5000, 15000, 15000
.send
t = Now + TimeValue("00:00:10")
sResp = False
On Error Resume Next
Do
If .readyState = 4 Then sResp = True: Exit Do
If Now > t Then sResp = False: Exit Do
DoEvents
Loop
On Error GoTo 0
If sResp Then
HTML.body.innerHTML = .responseText
Debug.Print HTML.querySelector(".question-hyperlink").innerText
Else:
Debug.Print "failed with: " & URL
End If
End With
Next URL
End Sub
在下一个URL之前,如何使脚本等待一段时间以解析标题?
Sub FetchContentWithinSpecificTime()
Dim oHttp As New ServerXMLHTTP60, HTML As New HTMLDocument
Dim URL As Variant, Urllist As Variant, t As Date
Dim sPrice$, sResp As Boolean
Urllist = Array( _
"https://finance.yahoo.com/quote/NZDUSD=X?p=NZDUSD=X", _
"https://finance.yahoo.com/quote/FB?p=FB", _
"https://finance.yahoo.com/quote/AAPL?p=AAPL", _
"https://finance.yahoo.com/quote/IBM?p=IBM", _
"https://finance.yahoo.com/quote/UCO?p=UCO" _
)
For Each URL In Urllist
Debug.Print "trying with: " & URL
With oHttp
.Open "GET", URL, True
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
t = Now + TimeValue("00:00:10")
sResp = False
Do While .readyState < 4
If .readyState = 4 Then Exit Do
sResp = (Now > t) Or (Err.Number <> 0)
If sResp Then Exit Do
DoEvents
Loop
If Not sResp Then
HTML.body.innerHTML = .responseText
sPrice = HTML.querySelector(".Mb\(-4px\)").innerText
Debug.Print sPrice
Else:
Debug.Print "failed with: " & URL
End If
End With
Next URL
End Sub