如何使用excel vba修复和提取Google CSE JSON API结果

问题描述 投票:1回答:1

我是新手在这里。尝试使用朋友提供的VBA代码进行提取。但他并不擅长与JSON相关。我试着在这个特定的代码上搜索。没找到正确的代码。

在这里,我试图仅提取所有结果的标题,网址和描述。

https://www.googleapis.com/customsearch/v1?alt=json&cx=016252715861662448569:taxvfdziuic&num=10&start=1&key=APIKey&q=“销售经理”

'Reference for early binding: Microsoft XML v6.0

Public Sub Custom_Search_All()

Dim URLsSheet As Worksheet, resultsSheet As Worksheet
Dim lastRow As Long, r As Long
Dim result As Variant
Dim lst As IXMLDOMNodeList
Dim rownum As Long
rownum = 4
Set URLsSheet = ThisWorkbook.Worksheets("Sheet2")
Set resultsSheet = ThisWorkbook.Worksheets("Sheet1")
resultsSheet.Cells.ClearContents
resultsSheet.Range("A3:D3").Value = Array("Title", "Link", "Summary", "Updated")

With URLsSheet
    lastRow = .Cells(Rows.Count, "A").End(xlUp).Row
    For r = 2 To lastRow
        Set lst = Google_CSE1(.Cells(r, "A").Value)
        For i = 0 To lst.Length - 1
            result = GetNodeValues(lst(i))
            resultsSheet.Cells(rownum, "A").Resize(1, UBound(result)).Value = result
            rownum = rownum + 1
        Next
    Next
    ReplaceTags resultsSheet
    resultsSheet.Range("A3").Select
End With
End Sub

Public Function GetNodeValues(node As IXMLDOMNode) As Variant
Dim results(1 To 4) As String
results(1) = node.SelectSingleNode("a:title").Text
results(2) = node.SelectSingleNode("a:link").Attributes.getNamedItem("href").Text
results(3) = Replace(node.SelectSingleNode("a:summary").Text, vbLf, " ") 'remove multiple line chars
results(4) = Cvt_ISO8601DT_Excel(node.SelectSingleNode("a:updated").Text)
GetNodeValues = results
End Function

Public Function Google_CSE1(queryURL As String) As IXMLDOMNodeList

Static XMLdoc As DOMDocument60
Dim lst As IXMLDOMNodeList

'https://developers.google.com/custom-search/json-api/v1/reference/cse/list
'
'The cse.list method returns metadata about the search performed, metadata about the custom search engine used for the search, and the search results.
'
'This method requires three query parameters:
'
'   The search engine to use in your request (using the cx query parameter)
'   The search terms for in this request (using the q query parameter).
'   Your API key (using the key query parameter).

If XMLdoc Is Nothing Then Set XMLdoc = New DOMDocument60
With XMLdoc

    'How To Specify Namespace when Querying the DOM with XPath - https://support.microsoft.com/en-us/help/294797

    'Search response starts with the following XML:
    '< ?xml version="1.0" encoding="UTF-8"? >
    '< feed gd:kind="customsearch#search" xmlns="http://www.w3.org/2005/Atom" xmlns:cse="http://schemas.google.com/cseapi/2010"
    'xmlns:gd="http://schemas.google.com/g/2005" xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/" >

    XMLdoc.async = False
    XMLdoc.validateOnParse = False
    XMLdoc.SetProperty "SelectionLanguage", "XPath"
    XMLdoc.SetProperty "SelectionNamespaces", "xmlns:a='http://www.w3.org/2005/Atom'"
    XMLdoc.Load queryURL
End With
Set lst = XMLdoc.SelectNodes("/a:feed/a:entry")
Set Google_CSE1 = lst

End Function

Private Function Cvt_ISO8601DT_Excel(dt As String) As Date

'Convert ISO8601 date time UTC (in the format yyyy-mm-ddthh-mm-ssz) to an Excel date-time
'                                             1234567890123456789
'https://en.wikipedia.org/wiki/ISO_8601#UTC

Cvt_ISO8601DT_Excel = DateSerial(Mid(dt, 1, 4), Mid(dt, 6, 2), Mid(dt, 9, 2)) + TimeSerial(Mid(dt, 12, 2), Mid(dt, 15, 2), Mid(dt, 18, 2))

End Function

Sub ReplaceTags(sht As Worksheet)
sht.Activate
sht.Columns("C:C").Select
Selection.Replace What:="<b>", Replacement:="", LookAt:=xlPart, _
    SearchOrder:=xlByColumns, MatchCase:=False, SearchFormat:=False, _
    ReplaceFormat:=False
Selection.Replace What:="</b>", Replacement:="", LookAt:=xlPart, _
    SearchOrder:=xlByColumns, MatchCase:=False, SearchFormat:=False, _
    ReplaceFormat:=False
Selection.Replace What:="<br>", Replacement:="", LookAt:=xlPart, _
    SearchOrder:=xlByColumns, MatchCase:=False, SearchFormat:=False, _
    ReplaceFormat:=False
Selection.Replace What:="&nbsp;...", Replacement:="", LookAt:=xlPart, _
    SearchOrder:=xlByColumns, MatchCase:=False, SearchFormat:=False, _
    ReplaceFormat:=False
Selection.Replace What:="...", Replacement:="", LookAt:=xlPart, _
    SearchOrder:=xlByColumns, MatchCase:=False, SearchFormat:=False, _
    ReplaceFormat:=False
End Sub

@QHarr您的脚本无法处理冒号,双引号,也不会跳过特定网址上没有数据的网址。它也没有显示它已完成提取数据的网址。

如果有人能帮助我,我将不胜感激。

json excel vba web-scraping google-custom-search
1个回答
1
投票

它的json就像json一样解析

您可以将它与XMLHTTPRequest一起使用来获取JSON数据,然后使用JSONConverter等工具解析响应。将.bas添加到项目后,您需要转到VBE>工具>引用并添加对Microsoft Scripting Runtime的引用。

在VBA中,Json []是你在For Each上/通过索引访问的集合; {}是您通过密钥访问的词典。

下面显示了访问路径以及如何有效地存储和写出结果。我假设summarysnippet。如果需要,可以轻松更改此键。

TODO:你可能想要处理没有物品的情况;非200响应http代码等

这假设列A表1在每个单元中具有搜索项,例如销售经理android开发人员和列B具有基本URL与url编码搜索术语的串联。

布局:

enter image description here

带有关键字的Col A(可能需要在两者之间加入+),B有基本url字符串:

“Qazxswpoi”

在其上添加网址编码的搜索字词:

https://www.googleapis.com/customsearch/v1?alt=json&cx=016252715861662448569:taxvfdziuic&num=10&start=1&key=yourAPIkey&q=

VBA:

="https://www.googleapis.com/customsearch/v1?alt=json&cx=016252715861662448569:taxvfdziuic&num=10&start=1&key=yourAPIkey&q=" & ENCODEURL(A1)
© www.soinside.com 2019 - 2024. All rights reserved.