我正在尝试通过单击下载按钮从浏览器的 PDF 查看器下载以下 PDF。
我使用 ID、CSS_SELECTOR 和 XPATH 定位器尝试了以下代码,但它似乎不起作用。 可能是什么问题?有人可以帮我解决这个问题吗?
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
download_url = "https://www.npci.org.in/PDF/nach/circular/2015-16/Circular-No.135.pdf"
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=options)
driver.get(download_url)
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, '#icon')))
driver.find_element(By.ID, '#icon').click()
time.sleep(5)
driver.quit()
启动下载时,您需要保持网络驱动程序运行,直到下载完成。但是,Selenium 中没有内置方法来等待下载完成。
以下脚本会将 PDF 文件下载到提供的文件夹路径。这是通过打开 Chrome 的下载管理器并监控上次下载的状态来实现的。
状态完成后,驱动程序会再等待几秒钟以确保下载的文件已重命名。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def download_pdf_to_custom_path(download_url, download_folder):
chrome_options = Options()
chrome_options.add_experimental_option("prefs", {
"download.default_directory": download_folder,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"plugins.always_open_pdf_externally": True
}
)
driver = webdriver.Chrome(options = chrome_options)
driver.get(download_url)
# Give some time for the download to begin
time.sleep(5)
# Open the Downloads Manager window
driver.get("chrome://downloads")
# Execute JavaScript code to get list of downloads
download_items_script = "return document.querySelector('downloads-manager').shadowRoot.getElementById('downloadsList').items;"
items = driver.execute_script(download_items_script)
# Most recent download is the first item in the list
# Each downloaded item has an attribute, `state`, that we can poll.
# Loop the JavaScript code block while the download state is in progress.
while items[0]["state"] == 0:
time.sleep(1)
items = driver.execute_script(download_items_script)
# The `state` will change to 2 when the download is complete
if items[0]["state"] == 2:
# Give the browser some time to rename the downloaded file
time.sleep(2)
else:
# The download did not complete successfully.
print(f"Something went wrong. {items[0]['state']}")
driver.quit()
if __name__ == "__main__":
from pathlib import Path
download_url = "https://www.npci.org.in/PDF/nach/circular/2015-16/Circular-No.135.pdf"
folder = str(Path(__file__).parent)
download_pdf_to_custom_path(download_url, folder)