运行我的网页抓取 python 脚本时出现错误

问题描述 投票:0回答:1

我有一个 python 脚本,它根据从 Excel 工作簿中获取的值从网站下载图像。但是,我不断收到此错误:

信息:为 CPU 创建了 TensorFlow Lite XNNPACK 委托。 警告:尝试使用仅支持静态大小张量的委托和具有动态大小张量的图(张量#141 是动态大小张量)。

它以前从未出现过,代码只会运行并完成执行。但出现这个错误后,程序就突然结束了。

我已将我的代码粘贴在下面。 我不知道原因是什么,但它也不会从 Excel 工作簿中删除行。知道为什么吗?

import os
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from pynput.keyboard import Key, Controller
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from openpyxl import Workbook, load_workbook
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service

def onescript(driver):
    book = load_workbook('trial.xlsx')
    sheet1 = book.active
    print(driver.title)
    driver.get("https://www.jaquar.com/en/") #Open the web page
    
    for i in range(1,4):
        prod_id = sheet1.cell(row=i+1, column=2).value

        #Click add Product :
        search_product = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "small-searchterms"))
        )
        search_product.clear()
        search_product.send_keys(prod_id)
        search_product.send_keys(Keys.ENTER)
        book.save('trial.xlsx')

        if driver.find_elements(By.CLASS_NAME, "button-2"):
            # Click add Image :
            click_details = WebDriverWait(driver, 2).until(
                EC.presence_of_element_located((By.CLASS_NAME, "button-2"))
            )
            click_details.click()

            # Download image - Set download directory if needed( same as the one to upload )
            download_image = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.ID, "download-image"))
            )
            download_image.click()

            product_found = True

        if not product_found:
            print("No product found")
            sheet1.delete_rows(i, 1)  # Delete the row where the product was not found
            book.save('updated_trial.xlsx')
        else:
            # Reset the index if a row was deleted
            pass
    book.save('trial.xlsx')       
        
absolute_path = os.path.dirname(__file__)
relative_path1 = "drivers\\chromedriver-win64\\chromedriver.exe"
full_path_cdriver = os.path.join(absolute_path, relative_path1)
relative_path2 = "drivers\\chrome-win64\\chrome.exe"
full_path_chrome = os.path.join(absolute_path, relative_path2)

#Upload your user-data folder so that the testing browser can know your cookies
options = Options()
options.add_argument("user-data-dir=C:/Users/xyzs/AppData/Local/Google/Chrome for Testing/User Data")
options.binary_location = full_path_chrome
service = Service(executable_path=full_path_cdriver)
driver = webdriver.Chrome(options=options, service=service)

onescript(driver)

如有任何帮助,我们将不胜感激。

python selenium-webdriver openpyxl
1个回答
0
投票

主要问题是您通过

(By.CLASS_NAME, "button-2")
找到的元素不再存在。因为您检查它是否存在,所以您的大部分代码都不会运行。我已根据当前站点和您提供的信息更新了代码。

此代码将搜索 ID,打开第一个找到的产品,然后下载其图像。我没有测试任何Excel相关代码。

import os
from openpyxl import load_workbook
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

def onescript(driver):
    book = load_workbook('trial.xlsx')
    sheet1 = book.active
    driver.get("https://www.jaquar.com/en/")
    driver.maximize_window()
    wait = WebDriverWait(driver, 10)

    # close site popup
    try:
        wait.until(EC.element_to_be_clickable((By.ID, "btn_redirect_close"))).click()
    except:
        # ignore if not there
        pass

    for i in range(1,4):
        prod_id = sheet1.cell(row=i+1, column=2).value

        # search for product
        wait.until(EC.visibility_of_element_located((By.ID, "small-searchterms"))).send_keys(f"{prod_id}\n")

        try:
            # click first product
            wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "div.product-item a[title]"))).click()
            # download image
            wait.until(EC.presence_of_element_located((By.ID, "download-image"))).click()

        except:
            print("No product found")
            sheet1.delete_rows(i, 1)  # delete the row where the product was not found
            book.save('updated_trial.xlsx')
        
absolute_path = os.path.dirname(__file__)
relative_path1 = "drivers\\chromedriver-win64\\chromedriver.exe"
full_path_cdriver = os.path.join(absolute_path, relative_path1)
relative_path2 = "drivers\\chrome-win64\\chrome.exe"
full_path_chrome = os.path.join(absolute_path, relative_path2)

# upload your user-data folder so that the testing browser can know your cookies
options = Options()
options.add_argument("user-data-dir=C:/Users/xyzs/AppData/Local/Google/Chrome for Testing/User Data")
options.binary_location = full_path_chrome
service = Service(executable_path=full_path_cdriver)

driver = webdriver.Chrome(options=options, service=service)

onescript(driver)
© www.soinside.com 2019 - 2024. All rights reserved.