由于等待评论加载而导致网页抓取超时

问题描述 投票:0回答:1

我想编写一个Python脚本,通过它我可以阅读谷歌地图上特定位置/商店的所有评论。我尝试对代码进行多次修改,但始终出现超时异常。下面是代码:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def scrape_google_reviews(url):
    # Set up Chrome WebDriver
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode, i.e., without opening browser window
    chromedriver_path = 'C:/Users/Downloads/chromedriver-win64/chromedriver.exe'  # Specify path to chromedriver executable
    service = Service(chromedriver_path)
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Load the Google Maps URL
    driver.get(url)

    # Wait for the reviews to load
    try:
        WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "ODSEW-ShBeI-content")))
    except TimeoutException as e:
        print("Timeout occurred while waiting for reviews to load:", e)
        driver.quit()
        return None
    except Exception as e:
        print("An error occurred while waiting for reviews to load:", e)
        driver.quit()
        return None

    # Extract review elements
    review_elements = driver.find_elements(By.CLASS_NAME, "ODSEW-ShBeI-content")

    # Extract review details
    reviews = []
    for review_element in review_elements:
        review_text = review_element.find_element(By.CSS_SELECTOR, ".ODSEW-ShBeI-title").text
        reviews.append(review_text)

    # Close the WebDriver
    driver.quit()

    return reviews

# Example usage
url = "https://www.google.com/maps/place/FASTECH+SOLUTIONS/@18.5165309,73.8457059,18.29z/data=!4m6!3m5!1s0x3bc2c160b5caf2dd:0x6d49235d88bd5d25!8m2!3d18.5161858!4d73.8459712!16s%2Fg%2F11t7drcv4g?entry=ttu"
reviews = scrape_google_reviews(url)
if reviews:
    for i, review in enumerate(reviews, 1):
        print(f"Review {i}: {review}")
else:
    print("Failed to scrape reviews.")

我不确定出了什么问题,我关注了几个博客,其中包括来自 geeksforgeeks 的博客,但它们似乎已经过时了。我的 chrome 版本是 122.0.6261.113,我从这里下载了 chromedriver.exe:https://storage.googleapis.com/chrome-for-testing-public/122.0.6261.128/win64/chromedriver-win64.zip

python selenium-webdriver web-scraping
1个回答
0
投票

这是获得审核的代码。

而不是使用动态创建的类来获取评论 - 我使用了

MyEned
,它看起来是静态的。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

def scrape_google_reviews(url):
    # Set up Chrome WebDriver
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in headless mode, i.e., without opening browser window
    chromedriver_path = 'C:/Users/Downloads/chromedriver-win64/chromedriver.exe'  # Specify path to chromedriver executable
    service = Service(chromedriver_path)
    driver = webdriver.Chrome(service=service, options=chrome_options)

    # Load the Google Maps URL
    driver.get(url)

    # Wait for the reviews to load
    try:
        WebDriverWait(driver, 120).until(EC.presence_of_element_located((By.CLASS_NAME, "MyEned")))
    except TimeoutException as e:
        print("Timeout occurred while waiting for reviews to load:", e)
        driver.quit()
        return None
    except Exception as e:
        print("An error occurred while waiting for reviews to load:", e)
        driver.quit()
        return None

    # Extract review elements
    review_elements = driver.find_elements(By.CLASS_NAME, "MyEned")
    print("review_elemtns", review_elements)
    # Extract review details
    reviews = []
    for review_element in review_elements:
        review_text = review_element.get_attribute("textContent")
        reviews.append(review_text)

    # Close the WebDriver
    driver.quit()

    return reviews

# Example usage
url = "https://www.google.com/maps/place/FASTECH+SOLUTIONS/@18.5165309,73.8457059,18.29z/data=!4m6!3m5!1s0x3bc2c160b5caf2dd:0x6d49235d88bd5d25!8m2!3d18.5161858!4d73.8459712!16s%2Fg%2F11t7drcv4g?entry=ttu"
reviews = scrape_google_reviews(url)
if reviews:
    for i, review in enumerate(reviews, 1):
        print(f"Review {i}: {review}")
else:
    print("Failed to scrape reviews.")

这是输出的屏幕截图。

© www.soinside.com 2019 - 2024. All rights reserved.