from bs4 import BeautifulSoup
import requests
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from getPaginationNumber import getPaginationNumber
from getDataProducts import getDataProducts
# # Set up Firefox options
# firefox_options = Options()
# firefox_options.add_argument("--headless") # Runs Firefox in headless mode
# URL of the webpage to scrape
url = 'https://www.iris.ma/155-serveur'
# Use Selenium to open the webpage in a browser
# driver = webdriver.Firefox(options=firefox_options)
# driver.get(url)
# Get the page source after dynamic content is loaded
# page_source = driver.page_source
page_source = requests.get(url)
soup = BeautifulSoup(page_source.content, 'lxml')
paginationNumber = getPaginationNumber(soup)
print(paginationNumber)
我使用selenium来获取代码源对象,但我想使用requests,因为它比selenium快,但它只带来css和js内容,我想要HTML
网页的Html代码源
您使用 lxml 解析器有什么具体原因吗?
html.parser
应该可以工作
当您使用请求获取 html 文件时没问题。
soup = BeautifulSoup(page_source.content, 'lxml')