如何用curl同时下载多个文件?

问题描述 投票:0回答:1

我正在使用我的私人令牌从我的 GitHub 存储库下载多个文件,我已经能够使其正常工作,但需要一一下载。

我想问如何在不使用线程的情况下并行下载多个文件。 我可以使用任何curl版本。

#include <curl/curl.h>

static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
    size_t realsize = size * nmemb;
    auto& mem = *static_cast<std::string*>(userp);
    mem.append(static_cast<char*>(contents), realsize);
    SecureZeroMemory(contents, realsize);
    return realsize;
}

void curl(std::string& data, const std::string_view& url)
{
    CURL* curl_handle;
    CURLcode res;
    struct curl_slist* slist{};

    curl_handle = curl_easy_init();

    curl_easy_setopt(curl_handle, CURLOPT_URL, url.data());
    //curl_easy_setopt(curl_handle, CURLOPT_TCP_KEEPALIVE, 0);

    slist = curl_slist_append(slist, gitToken.data());
    slist = curl_slist_append(slist, "Accept: application/vnd.github.v3.raw");
    curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, slist);

    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "curl/7.55.1");

    // Necessary for downloading file from github repo.
    curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
   
    curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, 1L); // only to debug

    res = curl_easy_perform(curl_handle);
    OutputDebugStringA(curl_easy_strerror(res));

    if(res != CURLE_OK)
       std::cerr << "curl_easy_perform() failed: " << curl_easy_strerror(res) << '\n';

    curl_easy_cleanup(curl_handle);
    curl_global_cleanup();
}

int main()
{
    std::vector<std::string> urlList =
    {
        "...",
        "...",
        // ...
    };

    for (int i = 0; i < urlList.size(); i++)
    {
        std::string data;
        curl(data, urlList[i]);
        // save data to disk ...
    }
}
c++ github curl libcurl
1个回答
0
投票

curl_multi API 由 libcurl 提供,支持并行传输,无需线程。

#include <curl/curl.h>
#include <iostream>
#include <vector>

static size_t WriteMemoryCallback(void* contents, size_t size, size_t nmemb, void* userp)
{
    size_t realsize = size * nmemb;
    auto& mem = *static_cast<std::string*>(userp);
    mem.append(static_cast<char*>(contents), realsize);
    memset(contents, 0, realsize); // Replacing SecureZeroMemory for platform-independence
    return realsize;
}

// Set curl options in this function
void setup_curl(CURL* curl_handle, std::string& data, const std::string_view& url, const std::string_view& gitToken)
{
    struct curl_slist* slist = nullptr;

    curl_easy_setopt(curl_handle, CURLOPT_URL, url.data());

    slist = curl_slist_append(slist, gitToken.data());
    slist = curl_slist_append(slist, "Accept: application/vnd.github.v3.raw");
    curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, slist);

    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, &data);
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "curl/7.55.1");

    curl_easy_setopt(curl_handle, CURLOPT_FOLLOWLOCATION, 1L);
    curl_easy_setopt(curl_handle, CURLOPT_VERBOSE, 1L); // only for debug
}

int main()
{
    std::vector<std::string> urlList =
    {
        "...",
        "...",
        // ...
    };

    CURLM* multi_handle;
    int still_running; /* keep number of running handles */

    curl_global_init(CURL_GLOBAL_DEFAULT);
    multi_handle = curl_multi_init();

    // Setup curl handles
    std::vector<CURL*> curl_handles;
    std::vector<std::string> dataVec(urlList.size());

    for (size_t i = 0; i < urlList.size(); i++)
    {
        CURL* curl_handle = curl_easy_init();
        setup_curl(curl_handle, dataVec[i], urlList[i], "YOUR_GITHUB_TOKEN_HERE");
        curl_handles.push_back(curl_handle);
        curl_multi_add_handle(multi_handle, curl_handle);
    }

    // Perform the requests in parallel
    curl_multi_perform(multi_handle, &still_running);

    while (still_running) 
    {
        struct timeval timeout;
        int rc;
        CURLMcode mc;

        fd_set fdread;
        fd_set fdwrite;
        fd_set fdexcep;
        int maxfd;

        timeout.tv_sec = 1;
        timeout.tv_usec = 0;

        FD_ZERO(&fdread);
        FD_ZERO(&fdwrite);
        FD_ZERO(&fdexcep);

        curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
        rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);

        switch (rc) 
        {
            case -1:
                break;
            case 0:
            default:
                curl_multi_perform(multi_handle, &still_running);
                break;
        }
    }

    // Cleanup
    for (CURL* handle : curl_handles) 
    {
        curl_multi_remove_handle(multi_handle, handle);
        curl_easy_cleanup(handle);
    }
    curl_multi_cleanup(multi_handle);
    curl_global_cleanup();

    // Now dataVec contains downloaded data for each URL. You can save it to disk or use as required.
}
© www.soinside.com 2019 - 2024. All rights reserved.