尝试使用selenium抓取佳能网页

问题描述 投票:0回答:1

其中一个类别有“加载更多”按钮,可加载摄像机。我需要编写滚动到“加载更多”按钮并单击它的函数 网址:https://www.usa.canon.com/shop/cameras/mirrorless-cameras

def find_load_more(driver, xpath):
    try:
        return WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, xpath))
        )
    except TimeoutException:
        print("Timeout waiting for the load more button.")
        return None


def scroll_to_load_more(driver, xpath):
    print(xpath)
    try:
        load_more_button = find_load_more(driver, xpath)
        if load_more_button:
            driver.execute_script("arguments[0].scrollIntoView();", load_more_button)
            load_more_button.click()
        else:
            print("Load more button not found.")
    except NoSuchElementException:
        print("The 'Load More' button does not exist.")


def scrape_canon_preview(category, driver):
    url = f"https://www.usa.canon.com/shop/cameras/{category}"
    driver.get(url)
    wait_for_page_load(driver)
    # Click Load more to load all the cameras
    scroll_to_load_more(driver, "//button[@class='primary amscroll-load-button']")

但是它返回“找不到加载更多按钮”和“等待加载更多按钮超时”。

python parsing selenium-webdriver web-scraping automation
1个回答
0
投票

我们应该滚动到一定高度,然后再单击“加载更多”按钮。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

url = "https://www.usa.canon.com/shop/cameras/mirrorless-cameras"
driver = webdriver.Chrome()
driver.get(url)
# Scroll down the page
scroll_height = driver.execute_script("return document.body.scrollHeight")
#scroll 60% of page
scroll_position = scroll_height * 0.7
driver.execute_script(f"window.scrollTo(0, {scroll_position});")
# driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
pagination_locator = ".amscroll-load-button"
scroll = True
while scroll:
    try:
        aa = 1
        # Check if pagination element is present
        pagination_element = WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, pagination_locator))
        )
        print("Pagination element found!")
        time.sleep(5)
        if pagination_element.text == "LOAD MORE":
            pagination_element.click()
        else:
            scroll_height = driver.execute_script("return document.body.scrollHeight")
            scroll_position = scroll_height * 0.7
            driver.execute_script(f"window.scrollTo(0, {scroll_position});")
            scroll = False
        
    except:
        # If button element is not found
        pass
    
# Close the WebDriver
driver.quit()
© www.soinside.com 2019 - 2024. All rights reserved.