这是我的代码:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import pandas as pd
from urls import URLS
region_index = 1
hotels_data = pd.DataFrame(columns=['name', 'url'])
options = Options()
options.add_argument("--headless")
options.add_argument('--no-sandbox')
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
chrome_driver_path = r'C:\chromedriver\chromedriver-win64\chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_driver_path, options=options)
driver.get(URLS[region_index])
wait = WebDriverWait(driver, 15)
num = 0
while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
try:
# button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="bodyconstraint-inner"]/div[2]/div/div[2]/div[3]/div[2]/div[2]/div[3]/div/button')))
button = wait.until(EC.element_to_be_clickable((By.XPATH, '//button[contains(., "Load more results")]')))
# button.click()
driver.execute_script("arguments[0].click();", button)
num+=1
except TimeoutException:
print("No more buttons to click or button not found within the specified time.")
break
except NoSuchElementException:
print("Button not found.")
break
hotels = driver.find_elements(By.XPATH, '//div[@data-testid="property-card"]')
sum_hotels = 0
for hotel in hotels:
name = hotel.find_element(By.XPATH, './/div[@data-testid="title"]').text
link = hotel.find_element(By.XPATH, './/a[@data-testid="title-link"]').get_attribute('href')
hotels_data.loc[len(hotels_data.index)] = [name, link]
sum_hotels+=1
hotels_data.to_csv(f'hotels_{region_index}_url.csv')
print(f'Total {sum_hotels} hotels')
print(num)
driver.quit()
该代码适用于 https://www.booking.com/searchresults.ru.html?label=gen173nr-1FCAYoJ0IMbmlhZ2FyYWZhbGxzSCFYBGipAYgBAZgBIbgBF8gBDNgBAegBAfgBAogCAagCA7gCspjWsAbAAgHSAiQ1Njk3OTQ3OS0zNGRhLTQ yNGYtOGVhMy01NzRmZWI5ODk4OGbYAgXgAgE&sid=9293bbd9385ccceffb672c888c1c029d®ion=2454& 但不适用于页面 https://www.booking.com/searchresults.ru.html?label=gen173nr-1FCAYoJ0IMbmlhZ2FyYWZhbGxzSCFYBGipAYgBAZgBIbgBF8gBDNgBAegBAfgBAogCAagCA7gCspjWsAbAAgHSAiQ1Njk3OTQ3OS0zNGRhLTQy NGYtOGVhMy01NzRmZWI5ODk4OGbYAgXgAgE&sid=9293bbd9385ccceffb672c888c1c029d®ion=3615& 该按钮未在第二页上处理,其他一切正常 如何解决?
我让xpath更通用,我不知道还有什么问题,请帮忙
我找到了它的工作xpath,前提应该是你已经滚动到最底部并且元素加载更多按钮加载该站点。
对我来说,该网站默认以俄语加载。
xpath =
//div[@class='bottom_of_basiclayout']/parent::div/div/div/div/button/span
如果有帮助,干杯!