我有一个html DOM数据连续更改的网站。但是我想在一定时间间隔内导出该html内容。因为我必须解析该html数据。网站不允许从api获取数据。因此,我需要可以作为浏览器插件运行的解决方案,或者可以在浏览器控制台上运行的任何JavaScript。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
# TIME AFTER WHICH YOU WANTED TO TAKE SNAPSHOT
TIME_AFTER_TAKE_SNAPSHOT = 5
# Specifying incognito mode as you launch your browser[OPTIONAL]
option = webdriver.ChromeOptions()
# Create new Instance of Chrome in incognito mode
browser = webdriver.Chrome(executable_path='/var/lib/chromedriver', chrome_options=option)
# Go to desired website
browser.get("https://google.com/")
for i in range(1000):
# complate page your want to take snapshot
print browser.page_source
# all text content of tag index print below.
val = browser.find_elements_by_class_name("index{}".format(i))
print val[0].text
time.sleep(TIME_AFTER_TAKE_SNAPSHOT)