import time
from selenium import webdriver
def main():
download(f"https://finance.yahoo.com/quote/TSLA/", 0)
def download(url, n):
# Set the path to the Firefox WebDriver executable with the exe in the path
driver_path = "D:/PROGRAMMING/Website-to-PDF/FirefoxDriver/geckodriver.exe"
download_path = "./Downloads"
output_pdf = f"Download {n}.pdf"
firefox_options = webdriver.FirefoxOptions()
firefox_options.add_argument('--headless')
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
# Start printing the page without needing to press "print"
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference('print.save_as_pdf.links.enabled', True)
# Get rid of the headers
firefox_options.set_preference("print.print_headerleft", "")
firefox_options.set_preference("print.print_headerright", "")
firefox_options.set_preference("print.print_footerleft", "")
firefox_options.set_preference("print.print_footerright", "")
# Using print_printer
firefox_options.set_preference("print_printer", "Mozilla Save to PDF")
firefox_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
firefox_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename', f"{download_path}/{output_pdf}")
gecko_service = webdriver.FirefoxService(executable_path=driver_path, port=4444)
driver = webdriver.Firefox(options=firefox_options, service=gecko_service)
print(f"Downloading File {n}")
driver.get(url)
driver.implicitly_wait(10)
driver.execute_script('window.print();')
time.sleep(15)
driver.quit()
print(f"Finished Download.\n")
if __name__ == "__main__":
main()
我的计算机上的任何位置都没有下载文件。我也根本无法选择保存文件的位置。
import time
from selenium import webdriver
def main():
download(f"https://finance.yahoo.com/quote/TSLA/", 0)
def download(url, n):
# Set the path to the Firefox WebDriver executable with the exe in the path
driver_path = "D:/PROGRAMMING/Website-to-PDF/FirefoxDriver/geckodriver.exe"
download_path = "./Downloads"
output_pdf = f"Download {n}.pdf"
firefox_options = webdriver.FirefoxOptions()
firefox_options.add_argument('--headless')
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
# Start printing the page without needing to press "print"
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference('print.save_as_pdf.links.enabled', True)
# Get rid of the headers
firefox_options.set_preference("print.print_headerleft", "")
firefox_options.set_preference("print.print_headerright", "")
firefox_options.set_preference("print.print_footerleft", "")
firefox_options.set_preference("print.print_footerright", "")
# Using print_printer
# firefox_options.set_preference("print_printer", "Mozilla Save to PDF")
# firefox_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
# firefox_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename', f"{download_path}/{output_pdf}")
gecko_service = webdriver.FirefoxService(executable_path=driver_path, port=4444)
driver = webdriver.Firefox(options=firefox_options, service=gecko_service)
print(f"Downloading File {n}")
driver.get(url)
driver.implicitly_wait(10)
driver.execute_script('window.print();')
time.sleep(15)
driver.quit()
print(f"Finished Download.\n")
if __name__ == "__main__":
main()
正如您在这里看到的,注释掉 print_printer 后,我的代码可以工作,但它会提示我要保存文件的位置以及文件名应该是什么。
我希望它自动保存在我的
download_path
中,文件名为output_pdf
。
我已尝试将
/
更改为 \
& \\
但仍然不起作用。
我尝试使用类似的方法在这个post中完成,我用自己的添加了
user_agent
,设置了它的首选项,但是profile_options = FirefoxProfile()
将不起作用,因为webdriver.Firefox(options=profile_options)
不会像options
那样工作参数需要有一个 options.Options
的实例。
我还尝试了其他库,例如
pyautogui
,它可以工作,但结果不一致;我尝试了 pdfkit
和 wkhtmltox
但它给了我错误,我也找不到解决方案。
这是设置自定义下载文件夹的一种方法,直接下载PDF文件,无需提示:
[..]
import time as t
[..]
firefox_options = Firefox_Options()
firefox_options.add_argument('--headless')
firefox_options.add_argument("--disable-infobars")
firefox_options.add_argument("--disable-extensions")
firefox_options.add_argument("--disable-popup-blocking")
firefox_options.set_preference("print.always_print_silent", True)
firefox_options.set_preference("print.show_print_progress", False)
firefox_options.set_preference('print.save_as_pdf.links.enabled', True)
firefox_options.set_preference("print.print_headerleft", "")
firefox_options.set_preference("print.print_headerright", "")
firefox_options.set_preference("print.print_footerleft", "")
firefox_options.set_preference("print.print_footerright", "")
firefox_options.set_preference("browser.download.folderList", 2)
firefox_options.set_preference("browser.download.dir", "/home/user/Desktop")
firefox_options.set_preference("browser.download.useDownloadDir", True)
firefox_options.set_preference("pdfjs.disabled", True)
firefox_options.set_preference("print_printer", "Mozilla Save to PDF")
firefox_options.set_preference("print.printer_Mozilla_Save_to_PDF.print_to_file", True)
firefox_options.set_preference('print.printer_Mozilla_Save_to_PDF.print_to_filename', "/home/user/Desktop/output.pdf")
firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/pdf")
# firefox_options.add_argument("--headless")
driver = webdriver.Firefox(options=firefox_options)
driver.get('https://finance.yahoo.com/quote/TSLA/')
t.sleep(5)
driver.execute_script('window.print();')
t.sleep(5)
driver.quit()
上面的代码是直接将文件下载到桌面上,名称为“output.pdf”。当然,如果您愿意,您可以使路径/文件名动态。