我正在尝试抓取一个网站。我正在使用 Selenium Web 驱动程序,但访问被拒绝。 我尝试了以下方法:
这是我的代码:
import time
from selenium import webdriver
from time import sleep
from csv import writer
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ActionChains
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import uuid
import requests
import html5lib
import undetected_chromedriver as uc
# options = Options()
# user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
# options.add_argument("user-agent={0}".format(user_agent))
# # options.add_argument("--headless")
# options.add_argument("--disable-gpu")
# options.add_argument("--no-sandbox")
# options.add_experimental_option("excludeSwitches", ["enable-automation"])
# options.add_experimental_option("useAutomationExtension", False)
driver = uc.Chrome()
# driver = webdriver.Chrome(options=options)
# wait = WebDriverWait(driver, 20)
# action = ActionChains(driver)
base_url = "https://www.arrow.com/en/products/search?page="
base_xpath = "/html/body/div[1]/div[12]/div[2]/div/div[3]/div/div[1]/table/tbody"
for page_no in range(1, 90):
print(page_no)
url = base_url + str(page_no) + "&q=Computer%20on%20Module&r=true"
print(url)
driver.get(url)
sleep(5)
谢谢!
尝试使用此链接:
base_url = "https://www.arrow.com/en/products"