使用线程下载文件/视频时出现问题

问题描述 投票:0回答:1

我尝试使用以下功能进行下载。但下载的文件比原始文件大。

import os
import requests
from tqdm import tqdm
import threading

def download_chunk(url, save_folder, filename, start_byte, end_byte, progress_bar):
    headers = {'Range': f'bytes={start_byte}-{end_byte}'}
    response = requests.get(url, headers=headers, stream=True, allow_redirects=True)

    with open(os.path.join(save_folder, filename), 'ab') as file:
        for data in response.iter_content(chunk_size=4096):
            file.write(data)
            progress_bar.update(len(data))

def download_with_progress(url, save_folder, filename, num_threads=4):
    response = requests.head(url, allow_redirects=True)
    file_size = int(response.headers.get('content-length', 0))

    if response.status_code == 200:
        block_size = file_size // num_threads
        progress_bar = tqdm(total=file_size, unit='B', unit_scale=True)

        with open(os.path.join(save_folder, filename), 'wb') as file:
            file.truncate(file_size)

        threads = []

        for i in range(num_threads):
            start_byte = i * block_size
            end_byte = start_byte + block_size - 1 if i < num_threads - 1 else file_size - 1
            print(f"Thread {i}: Downloading bytes {start_byte}-{end_byte}")
            thread = threading.Thread(target=download_chunk, args=(url, save_folder, filename, start_byte, end_byte, progress_bar))
            threads.append(thread)
            thread.start()

        for thread in threads:
            thread.join()

        progress_bar.close()
        print(f"Downloaded {filename}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")


# Example usage:
download_with_progress("https://example.com/download.mp4", "GoogleColab", "hellyrev.mp4", num_threads=20)
# Google colab is dir 

我想快速下载文件,因为文件下载太慢。但是,下载的文件比原始文件大。请帮我解决这个问题。

我尝试使用多线程从 url 下载文件以提高整体 dl 速度。但是,下载的文件比原始文件大。

python multithreading download
1个回答
0
投票

我找到并实施的解决方案

from pypdl import Downloader

url = "https://mysite.site/file.pdf"
dest = "/content/GoogleColab/lfab.mp4"

dl = Downloader()
dl.start(url, dest, num_connections=10, display=True, multithread=True)

它将按进度下载文件。 Pysmartdl 也不错,但它(pypdl)对我来说更方便。

© www.soinside.com 2019 - 2024. All rights reserved.