为多线程HTTP客户端请求重构Golang Scripit

问题描述 投票:0回答:1

请忍受我有限的Golang理解。我正在阅读Google地方信息包装器的文档,但它仅支持Google附近搜索。这并不是一个很大的瓶颈。

我的脚本在附近进行搜索以找到该区域的Place_ID,然后继续进行地方详情查询以从该特定Google Maps业务条目中获取所有数据。

此地方详细信息查询是瓶颈所在,我希望可以从我编写的此脚本中获得一些反馈。

看来它不是多线程的,但是当我将'threadcount'从1增加到40时,我的示例脚本运行时间从40s减少到12s。

为了获得此功能,我必须进行大量复制和粘贴以及反复试验。非常感谢您的帮助。

1)当我将线程数增加到40时,为什么运行速度更快?2)如何通过多线程来加快速度?


package main

import (
    "sync"
    "bufio"
    "os"
    "fmt"
    "net/http"
    "time"
    "io/ioutil"
    "strings"
    "log"
    "crypto/tls"
    "googlemaps.github.io/maps"
    "bytes"
    "encoding/json"
)

var threadCount = 40
var wg sync.WaitGroup

var api_key = "api_key"
var top_cities_gps = "./top_cities_gps"
var next_page_token = ""
var business_types = []string{"accounting", "art_gallery"}

var connector = &http.Transport{
    MaxIdleConns:       threadCount,
    IdleConnTimeout:    5 * time.Second,
    DisableCompression: true,
    TLSClientConfig:    &tls.Config{InsecureSkipVerify: true},
}

var client = &http.Client{
    Transport: connector,
    Timeout:   3 * time.Second,
}
type GooglePlaces struct {
    HTMLAttributions []interface{} `json:"html_attributions"`
    NextPageToken    string        `json:"next_page_token"`
    Results          []struct {
        Geometry struct {
            Location struct {
                Lat float64 `json:"lat"`
                Lng float64 `json:"lng"`
            } `json:"location"`
            Viewport struct {
                Northeast struct {
                    Lat float64 `json:"lat"`
                    Lng float64 `json:"lng"`
                } `json:"northeast"`
                Southwest struct {
                    Lat float64 `json:"lat"`
                    Lng float64 `json:"lng"`
                } `json:"southwest"`
            } `json:"viewport"`
        } `json:"geometry"`
        Icon         string `json:"icon"`
        ID           string `json:"id"`
        Name         string `json:"name"`
        OpeningHours struct {
            OpenNow     bool          `json:"open_now"`
            WeekdayText []interface{} `json:"weekday_text"`
        } `json:"opening_hours,omitempty"`
        Photos []struct {
            Height           int      `json:"height"`
            HTMLAttributions []string `json:"html_attributions"`
            PhotoReference   string   `json:"photo_reference"`
            Width            int      `json:"width"`
        } `json:"photos,omitempty"`
        PlaceID   string   `json:"place_id"`
        Reference string   `json:"reference"`
        Scope     string   `json:"scope"`
        Types     []string `json:"types"`
        Vicinity  string   `json:"vicinity"`
        Rating    float64  `json:"rating,omitempty"`
    } `json:"results"`
    Status string `json:"status"`
}

func placeDetails(place_id string) {

    var placeBuffer bytes.Buffer

    placeBuffer.WriteString("https://maps.googleapis.com/maps/api/place/details/json?place_id=")
    placeBuffer.WriteString(place_id)
    placeBuffer.WriteString("&fields=name,website,formatted_phone_number,formatted_address,rating,url&key=")
    placeBuffer.WriteString(api_key)

    detailsQuery := placeBuffer.String()
    resp, err := client.Get(detailsQuery)

    if err != nil {
        fmt.Println("[?] Error connecting to ", err, " with ", detailsQuery)
    }

    defer resp.Body.Close()

    body, err := ioutil.ReadAll(resp.Body)
    if err != nil {
            log.Fatal(err)
    }

    fmt.Println(string(body))


}

func searchPlaces(page string, keyword2 string, latLong2 string) {
    apiKey := api_key
    keyword := keyword2
    latLong := latLong2
    pageToken := page
    var buffer bytes.Buffer

    buffer.WriteString("https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=")
    buffer.WriteString(latLong)
    buffer.WriteString("&radius=50000&keyword=")
    buffer.WriteString(keyword)
    buffer.WriteString("&key=")
    buffer.WriteString(apiKey)
    buffer.WriteString("&pagetoken=")
    buffer.WriteString(pageToken)

    query := buffer.String()

        resp, err := client.Get(query)

        if err != nil {
            fmt.Println("[?] Error connecting to ", err, " with ", query)
        }

    defer resp.Body.Close()

    body, err := ioutil.ReadAll(resp.Body)
    if err != nil {
        log.Fatal(err)
    }

    res := GooglePlaces{}
    json.Unmarshal([]byte(body), &res)

    for i := 0; i < len(res.Results); i++ {
        placeDetails(res.Results[i].PlaceID)
  //      listings.WriteString("\n")
    }
    if err != nil {
        log.Fatal(err)
    }

        // recursively search
        if res.NextPageToken == "" {
            return
        } else {
            searchPlaces(res.NextPageToken, keyword, latLong)
        }

}

func GoGoogle() {

  f, err := os.OpenFile(top_cities_gps, os.O_RDONLY, os.ModePerm)

  if err != nil {
      log.Fatalf("open file error: %v", err)
      return
  }

  defer f.Close()

    c, err := maps.NewClient(maps.WithAPIKey(api_key), maps.WithRateLimit(1000))

    if err != nil {
        log.Fatalf("fatal errors: %s on %s", err, c)
    }

  rd := bufio.NewReader(f)

  for {
      line, err := rd.ReadString('\n')
      if err != nil {

          log.Fatalf("read file line error: %v", err)
          return
      }

      _ = line

            // alright! let's kick this up a notch, and start scraping!!! :D
            // looping all business types

            for i, s := range business_types {

                // now let's hit Google Places API for a NearbySearch!
                // https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=-33.8670522,151.1957362&rankby=distance&type=food&key=AIzaSyDkh5HjYLc1g7xYikFYvwvahjJBdc7R9Os
                // https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=44.9591352,-89.6301221&keyword=car_wash&type=s&key=AIzaSyDkh5HjYLc1g7xYikFYvwvahjJBdc7R9Os
                //var url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=" + strings.TrimSpace(line) + "&radius=9999999&keyword=&type=" + strings.TrimSpace(s) + "&key=" + api_key

                searchPlaces("", s, strings.TrimSpace(line))

        }
  }
}

func main() {
    GoGoogle()
}

go google-places-api
1个回答
1
投票

threadCount的命名错误。它仅用于设置HTTP传输中的MaxIdleConns。根据文档:

 // MaxIdleConns controls the maximum number of idle (keep-alive)
 // connections across all hosts. Zero means no limit.

因此,当您将threadCount从1增加到40时,将增加保持活动连接的限制。从这种用法看来,将threadCount设置为0可能会为您带来最佳效果。

我建议您完全摆脱threadCount

© www.soinside.com 2019 - 2024. All rights reserved.