data-gs
属性的值不是静态的。它的值随着每个请求而变化,因此我们可以使用更好的 xpath 来解决识别价格问题。
下面的代码片段按预期工作。
from selenium import webdriver
from lxml import html
from time import sleep
def main():
driver = webdriver.Chrome()
URL = "https://www.google.com/travel/flights/search?tfs=CBwQAhooEgoyMDI0LTA2LTEwagwIAhIIL20vMDFfZDRyDAgDEggvbS8wMTkxNBooEgoyMDI0LTA4LTEzagwIAxIIL20vMDE5MTRyDAgCEggvbS8wMV9kNEABSAFwAYIBCwj___________8BmAEB&tfu=EgYIAhAAGAA&hl=en-US&curr=USD"
driver.get(URL)
sleep(5)
tree = html.fromstring(driver.page_source)
flights = tree.xpath("//div[contains(@class,'U3gSDe')]/div/div[contains(@class,'YMlIz FpEdX')]/span")
print(len(flights))
for item in flights:
print(item.text)
driver.close()
return
main()
输出:
9
$1,963
$2,097
$2,385
$2,657
$2,912
$2,947
$3,079
$3,337
$5,374