为什么浏览器中的网页关闭得这么快?

问题描述 投票:0回答:1

抱歉我的英语不好 当我尝试使用 Pool 打开多个窗口浏览器时 - 它立即关闭。

我想这已经发生了,当然我收到了这个错误(我最近开始收到这个错误,当我尝试仅使用一页时,它的工作方式是如何出现此错误的):

[4972:10204:0215/233710.867:ERROR:ssl_client_socket_impl.cc(974)] handshake failed; returned -1, SSL error code 1, net_error -101 

如何解决此错误,感谢任何帮助?

代码

import requests
from bs4 import BeautifulSoup
import time
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from multiprocessing import Pool


url = 'https://www.pik.ru/search/storehouse'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'
}


def download_pages_objects(url):
    if os.path.isfile(r'C:\Users\kraz1\OneDrive\Рабочий стол\Антон\python\парсинг\кладовочная\pik_links.txt') == True:
        os.remove(
            r'C:\Users\kraz1\OneDrive\Рабочий стол\Антон\python\парсинг\кладовочная\pik_links.txt')

    list_links = []
    req = requests.get(url, headers=headers)
    soup = BeautifulSoup(req.text, "html5lib")

    for i in soup.find_all("a", class_="styles__ProjectCard-uyo9w7-0 friPgx"):
        list_links.append('https://www.pik.ru'+i.get('href')+'\n')

    list_links = list(set(list_links))

    with open(r'C:\Users\kraz1\OneDrive\Рабочий стол\Антон\python\парсинг\кладовочная\pik_links.txt', 'a') as file:
        for link in list_links:
            file.write(link)


def get_list_objects_links(url):
    download_pages_objects(url)

    list_of_links = []
    with open(r'C:\Users\kraz1\OneDrive\Рабочий стол\Антон\python\парсинг\кладовочная\pik_links.txt', 'r') as file:
        for item in file:
            list_of_links.append(item[:-1])

    return list_of_links


def operation(url):

    # options = webdriver.ChromeOptions()
    # options.add_argument('--ignore-certificate-errors-spki-list')
    # options.add_argument('--ignore-ssl-errors')
    # options.add_argument('--ignore-certificate-error')

    driver = webdriver.Chrome()

    driver.get(url)
    time.sleep(40)

    pahe = driver.find_elements(By.CSS_SELECTOR, '.sc-gsnTZi.fWJuXR')

    count = len(pahe)
    print(len(pahe))
    while count != 0:
        count = 0

        for item in pahe:
            if ('Показать' in item.text):
                actions = ActionChains(driver)
                actions.move_to_element(item).perform()
                print(item.text, '  ', item, '\n\n')
                time.sleep(2)
                item.click()
                time.sleep(2)
                count += 1

        pahe = driver.find_elements(By.CSS_SELECTOR, '.sc-gsnTZi.fWJuXR')
        print(len(pahe))

    response = driver.page_source

    list_refer_storehouse_on_page = []
    soup = BeautifulSoup(response, 'lxml')
    for item in (soup.find_all('div', class_='sc-htiqpR fhmJpy')):
        list_refer_storehouse_on_page.append(
            'https://www.pik.ru' + item.find('a').get('href'))
    print(list_refer_storehouse_on_page)

    driver.close()
    driver.quit()

    with open(fr'C:\Users\kraz1\OneDrive\Рабочий стол\Антон\python\парсинг\кладовочная\data_pages\{url.split('/')[-2]}.txt', 'w') as file:
        for link in list_refer_storehouse_on_page:
            file.write(f'{link}\n')


def main():
    list_links = get_list_objects_links(url)
    p = Pool(processes=1)
    # p.map(operation, list_links[0])
    operation(list_links[0])


if __name__ == '__main__':
    main()

# options = webdriver.ChromeOptions()
# options.add_argument('--ignore-certificate-errors-spki-list')
# options.add_argument('--ignore-ssl-errors')
# options.add_argument('--ignore-certificate-error')
python parsing web-scraping screen-scraping
1个回答
0
投票

问题是我上传了selenium,但没有上传chrome

© www.soinside.com 2019 - 2024. All rights reserved.