在python的HTTP响应中接收正文内容

问题描述 投票:-2回答:1

我试图从正文中获取内容,但是当我需要sock.recv时,我总是返回0字节。我已经有了标题,并且工作正常,但是我逐字节接收了它。我现在的问题是:我的内容长度是标题的长度,也是标题的长度。现在我要分开拿身体任务3d

PS:我知道它不能像截图中那样工作,但是我还没有找到其他解决方案

  # -*- coding: utf-8 -*-
"""
task3.simple_web_browser
XX-YYY-ZZZ
<Your name>
"""

from socket import gethostbyname, socket, timeout, AF_INET, SOCK_STREAM
from sys import argv


HTTP_HEADER_DELIMITER = b'\r\n\r\n'
CONTENT_LENGTH_FIELD = b'Content-Length:'
HTTP_PORT = 80
ONE_BYTE_LENGTH = 1

def create_http_request(host, path, method='GET'):
    '''
    Create a sequence of bytes representing an HTTP/1.1 request of the given method.

    :param host: the string contains the hostname of the remote server
    :param path: the string contains the path to the document to retrieve
    :param method: the string contains the HTTP request method (e.g., 'GET', 'HEAD', etc...)
    :return: a bytes object contains the HTTP request to send to the remote server

    e.g.,) An HTTP/1.1 GET request to http://compass.unisg.ch/
    host: compass.unisg.ch
    path: /
    return: b'GET / HTTP/1.1\nHost: compass.unisg.ch\r\n\r\n'
    '''
    ###   Task 3(a)   ###

    # Hint 1: see RFC7230-7231 for the HTTP/1.1 syntax and semantics specification
    # https://tools.ietf.org/html/rfc7230
    # https://tools.ietf.org/html/rfc7231
    # Hint 2: use str.encode() to create an encoded version of the string as a bytes object
    # https://docs.python.org/3/library/stdtypes.html#str.encode
    r =  '{} {} HTTP/1.1\nHost: {}\r\n\r\n'.format(method, path, host)
    response = r.encode()

    return response


    ### Task 3(a) END ###


def get_content_length(header):
    '''
    Get the integer value from the Content-Length HTTP header field if it
    is found in the given sequence of bytes. Otherwise returns 0.

    :param header: the bytes object contains the HTTP header
    :return: an integer value of the Content-Length, 0 if not found
    '''
    ###   Task 3(c)   ###

    # Hint: use CONTENT_LENGTH_FIELD to find the value
    # Note that the Content-Length field may not be always at the end of the header.
    for line in header.split(b'\r\n'):
        if CONTENT_LENGTH_FIELD in line:
            return int(line[len(CONTENT_LENGTH_FIELD):])
    return 0


    ### Task 3(c) END ###


def receive_body(sock, content_length):
    '''
    Receive the body content in the HTTP response

    :param sock: the TCP socket connected to the remote server
    :param content_length: the size of the content to recieve
    :return: a bytes object contains the remaining content (body) in the HTTP response
    '''
    ###   Task 3(d)   ###
    body = bytes()
    data = bytes()


    while True:
        data = sock.recv(content_length)
        if len(data)<=0:
            break
        else:
            body += data

    return body 


    ### Task 3(d) END ###


def receive_http_response_header(sock):
    '''
    Receive the HTTP response header from the TCP socket.

    :param sock: the TCP socket connected to the remote server
    :return: a bytes object that is the HTTP response header received
    '''
    ###   Task 3(b)   ###

    # Hint 1: use HTTP_HEADER_DELIMITER to determine the end of the HTTP header
    # Hint 2: use sock.recv(ONE_BYTE_LENGTH) to receive the chunk byte-by-byte

    header = bytes() 
    chunk = bytes()

    try:
        while HTTP_HEADER_DELIMITER not in chunk:
            chunk = sock.recv(ONE_BYTE_LENGTH)
            if not chunk:
                break
            else:
                header += chunk
    except socket.timeout:
        pass

    return header  

    ### Task 3(b) END ###


def main():
    # Change the host and path below to test other web sites!
    host = 'example.com'
    path = '/index.html'
    print(f"# Retrieve data from http://{host}{path}")

    # Get the IP address of the host
    ip_address = gethostbyname(host)
    print(f"> Remote server {host} resolved as {ip_address}")

    # Establish the TCP connection to the host
    sock = socket(AF_INET, SOCK_STREAM)
    sock.connect((ip_address, HTTP_PORT))
    print(f"> TCP Connection to {ip_address}:{HTTP_PORT} established")

 # Uncomment this comment block after Task 3(a)
    # Send an HTTP GET request
    http_get_request = create_http_request(host, path)
    print('\n# HTTP GET request ({} bytes)'.format(len(http_get_request)))
    print(http_get_request)
    sock.sendall(http_get_request)
 # Comment block for Task 3(a) END

 # Uncomment this comment block after Task 3(b)
    # Receive the HTTP response header
    header = receive_http_response_header(sock)
    print(type(header))
    print('\n# HTTP Response Header ({} bytes)'.format(len(header)))
    print(header)
 # Comment block for Task 3(b) END

#  Uncomment this comment block after Task 3(c)
    content_length = get_content_length(header)
    print('\n# Content-Length')
    print(f"{content_length} bytes")
 # Comment block for Task 3(c) END

 # Uncomment this comment block after Task 3(d)
    body = receive_body(sock, content_length)
    print('\n# Body ({} bytes)'.format(len(body)))
    print(body)
 # Comment block for Task 3(d) END

if __name__ == '__main__':
    main()
python sockets http recv
1个回答
0
投票

我的内容长度是标题的长度,还有标题的长度

你不知道。在receive_http_response_header中,您始终仅再次检查HTTP_HEADER_DELIMITER最新字节(chunk而不是header),这意味着您将永远不会匹配标头的末尾:

    while HTTP_HEADER_DELIMITER not in chunk:
        chunk = sock.recv(ONE_BYTE_LENGTH)
        if not chunk:
            break
        else:
            header += chunk

然后,您假设您已阅读完整的标题,而实际上您已阅读完整的响应。这意味着您尝试读取响应正文时正在执行的另一个recv仅会返回0,因为那里没有更多数据,即该正文已经包含在您认为的HTTP标头中。

© www.soinside.com 2019 - 2024. All rights reserved.