我正在使用 Selenium 创建一个 Python 网络抓取程序,但我遇到了以下错误:Stacktrace

问题描述 投票:0回答:0

我正在制作一个程序,用 Selenium 在谷歌上抓取照片。 但我有一个问题。 我的代码在这里。

# Import modules
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

import os

# user-agent header
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36'}

# scroll down func
def scroll_down(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
        # Scroll down to the bottom of the page
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

        # Wait for the page to load
        time.sleep(2)

        # Calculate the new height of the page
        new_height = driver.execute_script("return document.body.scrollHeight")

        # If the height of the page hasn't changed, we have reached the end of the page
        if new_height == last_height:
            break
        
        # Update the last height of the page
        last_height = new_height


# Korean singer name list
tofu = ["뉴진스민지셀카"] # 걸스데이 민아', '카즈하', '뉴진스 하니'
arab = ['뉴진스 해린 셀카', '뉴진스 다니엘 셀카', '뉴진스 혜인 셀카', '뉴진스 민지 셀카', '블랙핑크 제니 셀카', '블랙핑크 리사', '블랙핑크 지수', '블랙핑크 로제', '레드벨벳 조이', '레드벨벳 아이린', '레드벨벳 웬디']

# Set up the WebDriver Manager
service = ChromeService(executable_path=ChromeDriverManager().install())
driver = webdriver.Chrome(service=service)

driver.maximize_window()

# Navigate to the desired website
url = "https://www.google.com/search?q=%EC%86%A1%EC%A4%91%EA%B8%B0&tbm=isch&ved=2ahUKEwjKkpiAiPL9AhVItlYBHapiAsUQ2-cCegQIABAA&oq=%EC%86%A1%EC%A4%91%EA%B8%B0&gs_lcp=CgNpbWcQAzIICAAQgAQQsQMyBQgAEIAEMgUIABCABDIFCAAQgAQyBQgAEIAEMgUIABCABDIFCAAQgAQyBQgAEIAEMgUIABCABDIFCAAQgAQ6BAgAEAM6CAgAELEDEIMBOgsIABCABBCxAxCDAToECCMQJ1DYBFjXCGDMCWgAcAB4AIABcYgBsAeSAQMxLjiYAQCgAQGqAQtnd3Mtd2l6LWltZ8ABAQ&sclient=img&ei=XkccZMrrL8js2roPqsWJqAw&bih=328&biw=1536"
driver.get(url)



for name in arab:
# search bar and input singer name
    search_bar = driver.find_element(By.CSS_SELECTOR, '[name="q"]')
    search_bar.clear()
    search_bar.send_keys(name, Keys.ENTER)

    # Scroll down to the bottom of the page
    scroll_down(driver)

    time.sleep(3)

    # Find all the image elements on the page
    elements = driver.find_elements(By.CSS_SELECTOR, 'img.rg_i.Q4LuWd')
    save_folder = 'C:/Users/parkdongkyu/Documents/photos/arab'
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)


    for index, element in enumerate(elements):
        try:
# Enlarge photo
            element.click()
# Wait photo
            big_photo = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'img.n3VNCb.pT0Scc.KAlRDb')))
            img_url = big_photo.get_attribute('src')

# Download the image
            if img_url:
                file_name = f"{name}_image_{index}.jpg"
                save_path = os.path.join(save_folder, file_name)

                # Download the image
                with open(save_path, 'wb') as f:
                    res = requests.get(img_url, headers=headers)
                    f.write(res.content)
                
                print(f"Image saved: {save_path}")
            else:
                print(f"No image URL found for element {index}")
        except Exception as e:
            print(f"Error processing element {index}: {e}")

driver.quit()

使用此代码,发生错误。

Error processing element 57: Message: 
Stacktrace:
Backtrace:
        (No symbol) [0x003ADCE3]
        (No symbol) [0x003439D1]
        (No symbol) [0x00254DA8]
        (No symbol) [0x0028019F]
        (No symbol) [0x002803AB]
        (No symbol) [0x002AEE62]
        (No symbol) [0x0029AF14]
        (No symbol) [0x002AD57C]
        (No symbol) [0x0029ACC6]
        (No symbol) [0x00276F68]
        (No symbol) [0x002780CD]
        GetHandleVerifier [0x00623832+2506274]
        GetHandleVerifier [0x00659794+2727300]
        GetHandleVerifier [0x0065E36C+2746716]
        GetHandleVerifier [0x00456690+617600]
        (No symbol) [0x0034C712]
        (No symbol) [0x00351FF8]
        (No symbol) [0x003520DB]
        (No symbol) [0x0035C63B]
        BaseThreadInitThunk [0x74EC7D69+25]
        RtlInitializeExceptionChain [0x76F8B74B+107]
        RtlClearBits [0x76F8B6CF+191]

此错误的原因可能是什么? 我的 GoogleChrome 浏览器版本是 111.0.5563.111.

我正在尝试修复此错误,将我的 chrome 浏览器更改为 Edge(EdgeManager),因为我认为此问题来自 chrome 浏览器版本冲突。 但是没有用。

python selenium-webdriver web-scraping python-requests screen-scraping
© www.soinside.com 2019 - 2024. All rights reserved.