黎明时我的代码工作得很好,但是今天当我醒来时它不再工作了,我没有更改任何代码行,我还检查了 Firefox 是否更新了,不,它没有,我已经不知道可能是什么,我一直在阅读 urllib 文档,但找不到任何信息
from asyncio.windows_events import NULL
from ctypes.wintypes import PINT
from logging import root
from socket import timeout
from string import whitespace
from tkinter import N
from turtle import color
from urllib.request import Request
from hyperlink import URL
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
#from webdriver_manager.firefox import GeckoDriverManager
import time
from datetime import datetime
import telebot
#driver = webdriver.Firefox(service=Service(GeckoDriverManager().install()))
colors = NULL
api = "******"
url = "https://blaze.com/pt/games/double"
bot = telebot.TeleBot(api)
chat_id = "*****"
firefox_driver_path = "/Users/Antônio/Desktop/roletarobo/geckodriver.exe"
firefox_options = Options()
firefox_options.add_argument("--headless")
webdriver = webdriver.Firefox(
executable_path = firefox_driver_path,
options = firefox_options)
with webdriver as driver:
driver.get(url)
wait = WebDriverWait(driver, 25)
wait.until(presence_of_element_located((By.CSS_SELECTOR, "div#roulette.page.complete")))
time.sleep(2)
results = driver.find_elements(By.CSS_SELECTOR, "div#roulette-recent div.entry")
for quote in results:
quote.text.split('\n')
data = [my_elem.text for my_elem in driver.find_elements(By.CSS_SELECTOR, "div#roulette-recent div.entry")][:8]
#método convertElements, converte elementos da lista em elementos declarados
def convertElements( oldlist, convert_dict ):
newlist = []
for e in oldlist:
if e in convert_dict:
newlist.append(convert_dict[e])
else:
newlist.append(e)
return newlist
#fim do método
colors = convertElements(data, {'':"white",'1':"red",'2':"red",'3':"red",'4':"red",'5':"red",'6':"red",'7':"red",'8':"black",'9':"black",'10':"black",'11':"black",'12':"black",'13':"black",'14':"black"})
print(colors)
它工作得很好,从周日开始我就一直在编码,而且它一直在工作
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\support\wait.py", line 78, in until
value = method(self._driver)
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\support\expected_conditions.py", line 64, in _predicate
return driver.find_element(*locator)
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 1248, in find_element
return self.execute(Command.FIND_ELEMENT, {
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 423, in execute
response = self.command_executor.execute(driver_command, params)
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 333, in execute
return self._request(command_info[0], url, body=data)
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 355, in _request
resp = self._conn.request(method, url, body=body, headers=headers)
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\request.py", line 78, in request
return self.request_encode_body(
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\request.py", line 170, in request_encode_body
return self.urlopen(method, url, **extra_kw)
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 813, in urlopen
return self.urlopen(
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 785, in urlopen retries = retries.increment(
File "C:\Users\Antônio\AppData\Local\Programs\Python\Python310\lib\site-packages\urllib3\util\retry.py", line 592, in increment raise MaxRetryError(_pool, url, error or ResponseError(cause))urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='localhost', port=59587): Max retries exceeded with url: /session/b38be2fe-6d92-464f-a096-c43183aef6a8/element (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000173145EF520>: Failed to establish a new connection: [WinError 10061] No connections could be made because the target machine actively refused them'))
这个错误信息...
MaxRetryError(_pool, url, error or ResponseError(cause))urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='localhost', port=59587): Max retries exceeded with url: /session/b38be2fe-6d92-464f-a096-c43183aef6a8/element (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000173145EF520>: Failed to establish a new connection: [WinError 10061] No connections could be made because the target machine actively refused them'))
...意味着 GeckoDriver 无法启动/生成新的 浏览上下文,即 firefox 会话。
此错误的根本原因可能是以下任一原因:
确保:
要逃避机器人检测,请传递参数
--disable-blink-features=AutomationControlled
,如下所示:
from selenium.webdriver.firefox.options import Options
options = Options()
options.add_argument('--disable-blink-features=AutomationControlled')
始终在
driver.quit()
方法中调用 tearDown(){}
来优雅地关闭和销毁 WebDriver 和 Web Client 实例。
引发 WebDriverWait 以将快速移动的 WebDriver 与 浏览上下文同步。
在尝试生成一个案例时遇到了同样的问题,如果由于某种原因失败(同时找到任何 html 元素),我需要重新启动我的驱动程序,所以我只是重新声明了驱动程序配置(如下所示):
try:
## driver config
driver = webdriver.Chrome(
service=service,
options=options
)
run_script(driver)
except:
# if the driver fails to load an element, then quit the driver
driver.quit()
print("\nScrapper stopped, launching again in 4 seconds...")
time.sleep(4)
## driver config
driver = webdriver.Chrome(
service=service,
options=options
)
time.sleep(3)
run_script(driver)
我的情况是我正在通过将网址作为字典传递来抓取多个网页。即使我也遇到了同样的错误。我为解决这个问题所做的只是没有退出驱动程序并等待下一次迭代开始。