r"""HTTP/1.1 client library
HTTPConnection goes through a number of "states", which define when a client
may legally make another request or fetch the response for a particular
request. This diagram details these state transitions:
| ( putheader() )* endheaders()
|\_____________________________
| response = getresponse() | ConnectionError
| response.read() | putrequest()
Idle Req-started-unread-response
response.read() | | ( putheader() )* endheaders()
Request-started Req-sent-unread-response
This diagram presents the following rules:
-- a second request may not be started until {response-headers-read}
-- a response [object] cannot be retrieved until {request-sent}
-- there is no differentiation between an unread response body and a
partially read response body
Note: this enforcement is applied by the HTTPConnection class. The
HTTPResponse class does not enforce this state machine, which
implies sophisticated clients may accelerate the request/response
pipeline. Caution should be taken, though: accelerating the states
beyond the above pattern may imply knowledge of the server's
connection-close behavior for certain requests. For example, it
is impossible to tell whether the server will close the connection
UNTIL the response headers have been read; this means that further
requests cannot be placed into the pipeline until it is known that
the server will NOT be closing the connection.
Logical State __state __response
------------- ------- ----------
Request-started _CS_REQ_STARTED None
Request-sent _CS_REQ_SENT None
Unread-response _CS_IDLE <response_class>
Req-started-unread-response _CS_REQ_STARTED <response_class>
Req-sent-unread-response _CS_REQ_SENT <response_class>
from urllib.parse import urlsplit
# HTTPMessage, parse_headers(), and the HTTP status code constants are
# intentionally omitted for simplicity
__all__ = ["HTTPResponse", "HTTPConnection",
"HTTPException", "NotConnected", "UnknownProtocol",
"UnknownTransferEncoding", "UnimplementedFileMode",
"IncompleteRead", "InvalidURL", "ImproperConnectionState",
"CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
"BadStatusLine", "LineTooLong", "RemoteDisconnected", "error",
_CS_REQ_STARTED = 'Request-started'
_CS_REQ_SENT = 'Request-sent'
# hack to maintain backwards compatibility
globals().update(http.HTTPStatus.__members__)
# another hack to maintain backwards compatibility
# Mapping status codes to official W3C names
responses = {v: v.phrase for v in http.HTTPStatus.__members__.values()}
# maximal amount of data to read at one time in _safe_read
# maximal line length when calling readline().
# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
# header-field = field-name ":" OWS field-value OWS
# field-value = *( field-content / obs-fold )
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
# field-vchar = VCHAR / obs-text
# obs-fold = CRLF 1*( SP / HTAB )
# ; obsolete line folding
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# ; any VCHAR, except delimiters
# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
# the patterns for both name and value are more lenient than RFC
# definitions to allow for backwards compatibility
_is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
_is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
# These characters are not allowed within HTTP URL paths.
# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
# Prevents CVE-2019-9740. Includes control characters such as \r\n.
# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
# Arguably only these _should_ allowed:
# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
# We are more lenient for assumed real world compatibility purposes.
# These characters are not allowed within HTTP method names
# to prevent http header injection.
_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
def _encode(data, name='data'):
"""Call data.encode("latin-1") but show a better error message."""
return data.encode("latin-1")
except UnicodeEncodeError as err:
raise UnicodeEncodeError(
"%s (%.20r) is not valid Latin-1. Use %s.encode('utf-8') "
"if you want to send it encoded in UTF-8." %
(name.title(), data[err.start:err.end], name)) from None
class HTTPMessage(email.message.Message):
# XXX The only usage of this method is in
# http.server.CGIHTTPRequestHandler. Maybe move the code there so
# that it doesn't need to be part of the public API. The API has
# never been defined so this could cause backwards compatibility
def getallmatchingheaders(self, name):
"""Find all header lines matching a given header name.
Look through the list of headers and find all lines matching a given
header name (and their continuation lines). A list of the lines is
returned, without interpretation. If the header does not occur, an
empty list is returned. If the header occurs multiple times, all
occurrences are returned. Case is not important in the header name.
name = name.lower() + ':'
if line[:n].lower() == name:
elif not line[:1].isspace():
"""Reads potential header lines into a list from a file pointer.
Length of line is limited by _MAXLINE, and number of
headers is limited by _MAXHEADERS.
line = fp.readline(_MAXLINE + 1)
raise LineTooLong("header line")
if len(headers) > _MAXHEADERS:
raise HTTPException("got more than %d headers" % _MAXHEADERS)
if line in (b'\r\n', b'\n', b''):
def parse_headers(fp, _class=HTTPMessage):
"""Parses only RFC2822 headers from a file pointer.
email Parser wants to see strings rather than bytes.
But a TextIOWrapper around self.rfile would buffer too many bytes
from the stream, bytes which we later need to read as bytes.
So we read the correct bytes here, as bytes, for email Parser
headers = _read_headers(fp)
hstring = b''.join(headers).decode('iso-8859-1')
return email.parser.Parser(_class=_class).parsestr(hstring)
class HTTPResponse(io.BufferedIOBase):
# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
# The bytes from the socket object are iso-8859-1 strings.
# See RFC 2616 sec 2.2 which notes an exception for MIME-encoded
# text following RFC 2047. The basic status line parsing only
def __init__(self, sock, debuglevel=0, method=None, url=None):
# If the response includes a content-length header, we need to
# make sure that the client doesn't read more than the
# specified number of bytes. If it does, it will block until
# the server times out and closes the connection. This will
# happen if a self.fp.read() is done (without a size) whether
# self.fp is buffered or not. So, no self.fp.read() by
# clients unless they know what they are doing.
self.fp = sock.makefile("rb")
self.debuglevel = debuglevel
# The HTTPResponse object is returned via urllib. The clients
# of http and urllib expect different attributes for the
# headers. headers is used here and supports urllib. msg is
# provided as a backwards compatibility layer for http
self.headers = self.msg = None
# from the Status-Line of the response
self.version = _UNKNOWN # HTTP-Version
self.status = _UNKNOWN # Status-Code
self.reason = _UNKNOWN # Reason-Phrase
self.chunked = _UNKNOWN # is "chunked" being used?
self.chunk_left = _UNKNOWN # bytes left to read in current chunk
self.length = _UNKNOWN # number of bytes left in response
self.will_close = _UNKNOWN # conn will close at end of response
line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
raise LineTooLong("status line")
print("reply:", repr(line))
# Presumably, the server closed the connection before
# sending a valid response.
raise RemoteDisconnected("Remote end closed connection without"
version, status, reason = line.split(None, 2)
version, status = line.split(None, 1)
# empty version will cause next test to fail.
if not version.startswith("HTTP/"):
raise BadStatusLine(line)
# The status code is a three-digit number
if status < 100 or status > 999:
raise BadStatusLine(line)
raise BadStatusLine(line)
return version, status, reason
if self.headers is not None:
# we've already started reading the response
# read until we get a non-100 response
version, status, reason = self._read_status()
# skip the header from the 100 response
skipped_headers = _read_headers(self.fp)
print("headers:", skipped_headers)
self.code = self.status = status
self.reason = reason.strip()
if version in ("HTTP/1.0", "HTTP/0.9"):
# Some servers might still return "0.9", treat it as 1.0 anyway
elif version.startswith("HTTP/1."):
self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
raise UnknownProtocol(version)
self.headers = self.msg = parse_headers(self.fp)
print("header:", hdr + ":", self.headers.get(hdr))
# are we using the chunked-style of transfer encoding?
tr_enc = self.headers.get("transfer-encoding")
if tr_enc and tr_enc.lower() == "chunked":
# will the connection close at the end of the response?
self.will_close = self._check_close()
# do we have a Content-Length?
# NOTE: RFC 2616, S4.4, #3 says we ignore this if tr_enc is "chunked"
length = self.headers.get("content-length")
# are we using the chunked-style of transfer encoding?
tr_enc = self.headers.get("transfer-encoding")
if length and not self.chunked:
self.length = int(length)
if self.length < 0: # ignore nonsensical negative lengths
# does the body have a fixed length? (of zero)
if (status == NO_CONTENT or status == NOT_MODIFIED or
100 <= status < 200 or # 1xx codes
# if the connection remains open, and we aren't using chunked, and
# a content-length was not provided, then assume that the connection
if (not self.will_close and
conn = self.headers.get("connection")
# An HTTP/1.1 proxy is assumed to stay open unless
conn = self.headers.get("connection")
if conn and "close" in conn.lower():
# Some HTTP/1.0 implementations have support for persistent
# connections, using rules different than HTTP/1.1.
# For older HTTP, Keep-Alive indicates persistent connection.
if self.headers.get("keep-alive"):
# At least Akamai returns a "Connection: Keep-Alive" header,
# which was supposed to be sent by the client.
if conn and "keep-alive" in conn.lower():
# Proxy-Connection is a netscape hack.
pconn = self.headers.get("proxy-connection")
if pconn and "keep-alive" in pconn.lower():
# otherwise, assume it will close
super().close() # set "closed" flag
# These implementations are for the benefit of io.BufferedReader.
# XXX This class should probably be revised to act more like
# the "raw stream" that BufferedReader expects.
"""Always returns True"""
# End of "raw stream" methods
"""True if the connection is closed."""
# NOTE: it is possible that we will not ever call self.close(). This
# case occurs when will_close is TRUE, length is None, and we
# read up to the last byte, but NOT past it.
# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
# called, meaning self.isclosed() is meaningful.
def read(self, amt=None):
if self._method == "HEAD":
# Amount is given, implement using readinto
return memoryview(b)[:n].tobytes()
# Amount is not given (unbounded read) so we must check self.length
return self._readall_chunked()
s = self._safe_read(self.length)
self._close_conn() # we read everything
"""Read up to len(b) bytes into bytearray b and return the number
if self._method == "HEAD":