请忍受我有限的Golang理解。我正在阅读Google地方信息包装器的文档,但它仅支持Google附近搜索。这并不是一个很大的瓶颈。
我的脚本在附近进行搜索以找到该区域的Place_ID,然后继续进行地方详情查询以从该特定Google Maps业务条目中获取所有数据。
此地方详细信息查询是瓶颈所在,我希望可以从我编写的此脚本中获得一些反馈。
看来它不是多线程的,但是当我将'threadcount'从1增加到40时,我的示例脚本运行时间从40s减少到12s。
为了获得此功能,我必须进行大量复制和粘贴以及反复试验。非常感谢您的帮助。
1)当我将线程数增加到40时,为什么运行速度更快?2)如何通过多线程来加快速度?
package main
import (
"sync"
"bufio"
"os"
"fmt"
"net/http"
"time"
"io/ioutil"
"strings"
"log"
"crypto/tls"
"googlemaps.github.io/maps"
"bytes"
"encoding/json"
)
var threadCount = 40
var wg sync.WaitGroup
var api_key = "api_key"
var top_cities_gps = "./top_cities_gps"
var next_page_token = ""
var business_types = []string{"accounting", "art_gallery"}
var connector = &http.Transport{
MaxIdleConns: threadCount,
IdleConnTimeout: 5 * time.Second,
DisableCompression: true,
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
}
var client = &http.Client{
Transport: connector,
Timeout: 3 * time.Second,
}
type GooglePlaces struct {
HTMLAttributions []interface{} `json:"html_attributions"`
NextPageToken string `json:"next_page_token"`
Results []struct {
Geometry struct {
Location struct {
Lat float64 `json:"lat"`
Lng float64 `json:"lng"`
} `json:"location"`
Viewport struct {
Northeast struct {
Lat float64 `json:"lat"`
Lng float64 `json:"lng"`
} `json:"northeast"`
Southwest struct {
Lat float64 `json:"lat"`
Lng float64 `json:"lng"`
} `json:"southwest"`
} `json:"viewport"`
} `json:"geometry"`
Icon string `json:"icon"`
ID string `json:"id"`
Name string `json:"name"`
OpeningHours struct {
OpenNow bool `json:"open_now"`
WeekdayText []interface{} `json:"weekday_text"`
} `json:"opening_hours,omitempty"`
Photos []struct {
Height int `json:"height"`
HTMLAttributions []string `json:"html_attributions"`
PhotoReference string `json:"photo_reference"`
Width int `json:"width"`
} `json:"photos,omitempty"`
PlaceID string `json:"place_id"`
Reference string `json:"reference"`
Scope string `json:"scope"`
Types []string `json:"types"`
Vicinity string `json:"vicinity"`
Rating float64 `json:"rating,omitempty"`
} `json:"results"`
Status string `json:"status"`
}
func placeDetails(place_id string) {
var placeBuffer bytes.Buffer
placeBuffer.WriteString("https://maps.googleapis.com/maps/api/place/details/json?place_id=")
placeBuffer.WriteString(place_id)
placeBuffer.WriteString("&fields=name,website,formatted_phone_number,formatted_address,rating,url&key=")
placeBuffer.WriteString(api_key)
detailsQuery := placeBuffer.String()
resp, err := client.Get(detailsQuery)
if err != nil {
fmt.Println("[?] Error connecting to ", err, " with ", detailsQuery)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(body))
}
func searchPlaces(page string, keyword2 string, latLong2 string) {
apiKey := api_key
keyword := keyword2
latLong := latLong2
pageToken := page
var buffer bytes.Buffer
buffer.WriteString("https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=")
buffer.WriteString(latLong)
buffer.WriteString("&radius=50000&keyword=")
buffer.WriteString(keyword)
buffer.WriteString("&key=")
buffer.WriteString(apiKey)
buffer.WriteString("&pagetoken=")
buffer.WriteString(pageToken)
query := buffer.String()
resp, err := client.Get(query)
if err != nil {
fmt.Println("[?] Error connecting to ", err, " with ", query)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
}
res := GooglePlaces{}
json.Unmarshal([]byte(body), &res)
for i := 0; i < len(res.Results); i++ {
placeDetails(res.Results[i].PlaceID)
// listings.WriteString("\n")
}
if err != nil {
log.Fatal(err)
}
// recursively search
if res.NextPageToken == "" {
return
} else {
searchPlaces(res.NextPageToken, keyword, latLong)
}
}
func GoGoogle() {
f, err := os.OpenFile(top_cities_gps, os.O_RDONLY, os.ModePerm)
if err != nil {
log.Fatalf("open file error: %v", err)
return
}
defer f.Close()
c, err := maps.NewClient(maps.WithAPIKey(api_key), maps.WithRateLimit(1000))
if err != nil {
log.Fatalf("fatal errors: %s on %s", err, c)
}
rd := bufio.NewReader(f)
for {
line, err := rd.ReadString('\n')
if err != nil {
log.Fatalf("read file line error: %v", err)
return
}
_ = line
// alright! let's kick this up a notch, and start scraping!!! :D
// looping all business types
for i, s := range business_types {
// now let's hit Google Places API for a NearbySearch!
// https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=-33.8670522,151.1957362&rankby=distance&type=food&key=AIzaSyDkh5HjYLc1g7xYikFYvwvahjJBdc7R9Os
// https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=44.9591352,-89.6301221&keyword=car_wash&type=s&key=AIzaSyDkh5HjYLc1g7xYikFYvwvahjJBdc7R9Os
//var url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=" + strings.TrimSpace(line) + "&radius=9999999&keyword=&type=" + strings.TrimSpace(s) + "&key=" + api_key
searchPlaces("", s, strings.TrimSpace(line))
}
}
}
func main() {
GoGoogle()
}
threadCount
的命名错误。它仅用于设置HTTP传输中的MaxIdleConns
。根据文档:
// MaxIdleConns controls the maximum number of idle (keep-alive)
// connections across all hosts. Zero means no limit.
因此,当您将threadCount
从1增加到40时,将增加保持活动连接的限制。从这种用法看来,将threadCount
设置为0可能会为您带来最佳效果。
我建议您完全摆脱threadCount
。