stackoverflow 不提供内容长度的标头?

问题描述 投票:0回答:1

尝试开发一个简单的 C++ 应用程序以将域转储/路由到 index.html 文件

我正在使用 wininet header 来完成我的任务

虽然它适用于大多数网站,但无法从 stackoverflow.com 检索所有数据

简单地说它在还有数据可读的地方结束

检查我的浏览器 devtools 我看到以下请求 stackoverflow.com 的标头

accept-ranges: bytes
cache-control: private
content-encoding: gzip
content-security-policy: upgrade-insecure-requests; frame-ancestors 'self' https://stackexchange.com
content-type: text/html; charset=utf-8
date: Sat, 08 Apr 2023 01:02:17 GMT
feature-policy: microphone 'none'; speaker 'none'
strict-transport-security: max-age=15552000
vary: Accept-Encoding,Fastly-SSL
via: 1.1 varnish
x-cache: MISS
x-cache-hits: 0
x-dns-prefetch-control: off
x-frame-options: SAMEORIGIN
x-request-guid: 4fdc7ef0-020f-4a00-a7c7-2797853f6a27
x-served-by: cache-hel1410022-HEL
x-timer: S1680915738.773094,VS0,VE134

没有 Content-Length 参数

代码:

#include <iostream>
#include <windows.h>
#include <wininet.h>
#include <fstream>
#include <tchar.h>
#include <string>

int main(int argc, char **argv)
{
    if (argc == 1)
    {
        std::cout << "ENTER VALID URL ADDRESS" << std::endl;
        return 0;
    }
    int domain_len = strlen(argv[1]) + 1;
    // std::cout << domain_len << std::endl;
    // std::wstring domain;
    wchar_t *domain = new wchar_t[domain_len];
    MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, argv[1], domain_len, domain, domain_len);
    // std::wcout << domain.c_str() << std::endl;
    std::wcout << domain << std::endl;

    LPCWSTR U_AGENT = L"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36";
    HINTERNET i_hanlde_open = InternetOpenW(U_AGENT, INTERNET_OPEN_TYPE_DIRECT, NULL, NULL, 0);
    // std::cout << i_hanlde_open << std::endl;
    DWORD_PTR context = 0;
    HINTERNET i_handle_connect = InternetConnectW(i_hanlde_open, domain, INTERNET_DEFAULT_HTTPS_PORT, NULL, NULL, INTERNET_SERVICE_HTTP, INTERNET_FLAG_PASSIVE, 0);
    // std::cout << i_handle_connect << std::endl;
    HINTERNET i_handle_http = HttpOpenRequestW(i_handle_connect, L"GET", L"/", NULL, NULL, NULL, INTERNET_FLAG_NO_CACHE_WRITE | INTERNET_FLAG_SECURE | INTERNET_FLAG_PRAGMA_NOCACHE, 0);
    // std::cout << i_handle_http << std::endl;

    BOOL i_handle_send = HttpSendRequestW(i_handle_http, NULL, 0, NULL, 0);
    // std::cout << i_handle_send << std::endl;
    // std::cout << GetLastError() << "\n" << std::endl;

    DWORD buffer_len = 200;
    LPVOID buffer = new char[buffer_len];
    LPDWORD lpdwIndex = 0;
    BOOL query_status = HttpQueryInfoW(i_handle_http, HTTP_QUERY_STATUS_CODE | HTTP_QUERY_FLAG_NUMBER, (LPVOID)buffer, &buffer_len, lpdwIndex);
    // std::cout << query_status << std::endl;
    std::cout << "STATUS CODE: [" << *(DWORD *)buffer << "]" << std::endl;
    // std::cout << GetLastError() << "\n" << std::endl;

    buffer_len = 200;
    query_status = HttpQueryInfoW(i_handle_http, HTTP_QUERY_CONTENT_LENGTH | HTTP_QUERY_FLAG_NUMBER, (LPVOID)buffer, &buffer_len, lpdwIndex);
    if (query_status)
    {
        std::cout << "CONTENT LENGTH: [" << *(DWORD *)buffer << "]" << std::endl;
    }

    std::ofstream file("index.html", std::ios::out);
    BOOL read_data_status;
    DWORD bytes_read = 0;
    unsigned long long int total_size = 0;
    buffer_len = query_status ? *(DWORD *)buffer : 1088000;
    delete[](char *) buffer;
    buffer = new char[buffer_len];
    do
    {
        bytes_read = 0;
        read_data_status = InternetReadFile(i_handle_http, buffer, buffer_len, &bytes_read);
        // std::cout << read_data_status << std::endl;
        std::cout << "READ [" << bytes_read << "] BYTES || " << read_data_status << std::endl;
        // std::cout << buffer_len << std::endl;
        // std::cout << GetLastError() << std::endl;
        file << (char *)buffer;
        total_size += bytes_read;
    } while (read_data_status && bytes_read);
    std::cout << "TOTAL SIZE: [" << total_size << "] BYTES" << std::endl;


    delete[](char *) buffer;
    file.close();
    InternetCloseHandle(i_hanlde_open);
    InternetCloseHandle(i_handle_connect);
    InternetCloseHandle(i_handle_http);
    std::cout << "END! :)";
}

使用的库: -lwininet

用法:

./main.exe stackoverflow.com

为什么响应标头中缺少内容长度标头?

我怎样才能检索所有数据?

c++ wininet content-length
1个回答
0
投票

实际上,Content-Length 标头仅对于具有 entity body 的消息是强制性的。

参见,例如内容长度:实体的大小

首先在任何带有实体主体的页面上检查您的代码。

© www.soinside.com 2019 - 2024. All rights reserved.