我尝试使用以下功能进行下载。但下载的文件比原始文件大。
import os
import requests
from tqdm import tqdm
import threading
def download_chunk(url, save_folder, filename, start_byte, end_byte, progress_bar):
headers = {'Range': f'bytes={start_byte}-{end_byte}'}
response = requests.get(url, headers=headers, stream=True, allow_redirects=True)
with open(os.path.join(save_folder, filename), 'ab') as file:
for data in response.iter_content(chunk_size=4096):
file.write(data)
progress_bar.update(len(data))
def download_with_progress(url, save_folder, filename, num_threads=4):
response = requests.head(url, allow_redirects=True)
file_size = int(response.headers.get('content-length', 0))
if response.status_code == 200:
block_size = file_size // num_threads
progress_bar = tqdm(total=file_size, unit='B', unit_scale=True)
with open(os.path.join(save_folder, filename), 'wb') as file:
file.truncate(file_size)
threads = []
for i in range(num_threads):
start_byte = i * block_size
end_byte = start_byte + block_size - 1 if i < num_threads - 1 else file_size - 1
print(f"Thread {i}: Downloading bytes {start_byte}-{end_byte}")
thread = threading.Thread(target=download_chunk, args=(url, save_folder, filename, start_byte, end_byte, progress_bar))
threads.append(thread)
thread.start()
for thread in threads:
thread.join()
progress_bar.close()
print(f"Downloaded {filename}")
else:
print(f"Failed to download file. Status code: {response.status_code}")
# Example usage:
download_with_progress("https://example.com/download.mp4", "GoogleColab", "hellyrev.mp4", num_threads=20)
# Google colab is dir
我想快速下载文件,因为文件下载太慢。但是,下载的文件比原始文件大。请帮我解决这个问题。
我尝试使用多线程从 url 下载文件以提高整体 dl 速度。但是,下载的文件比原始文件大。
from pypdl import Downloader
url = "https://mysite.site/file.pdf"
dest = "/content/GoogleColab/lfab.mp4"
dl = Downloader()
dl.start(url, dest, num_connections=10, display=True, multithread=True)
它将按进度下载文件。 Pysmartdl 也不错,但它(pypdl)对我来说更方便。