r"""HTTP/1.1 client library
HTTPConnection goes through a number of "states", which define when a client
may legally make another request or fetch the response for a particular
request. This diagram details these state transitions:
| ( putheader() )* endheaders()
| response = getresponse()
Unread-response [Response-headers-read]
| response.read() | putrequest()
Idle Req-started-unread-response
response.read() | | ( putheader() )* endheaders()
Request-started Req-sent-unread-response
This diagram presents the following rules:
-- a second request may not be started until {response-headers-read}
-- a response [object] cannot be retrieved until {request-sent}
-- there is no differentiation between an unread response body and a
partially read response body
Note: this enforcement is applied by the HTTPConnection class. The
HTTPResponse class does not enforce this state machine, which
implies sophisticated clients may accelerate the request/response
pipeline. Caution should be taken, though: accelerating the states
beyond the above pattern may imply knowledge of the server's
connection-close behavior for certain requests. For example, it
is impossible to tell whether the server will close the connection
UNTIL the response headers have been read; this means that further
requests cannot be placed into the pipeline until it is known that
the server will NOT be closing the connection.
Logical State __state __response
------------- ------- ----------
Request-started _CS_REQ_STARTED None
Request-sent _CS_REQ_SENT None
Unread-response _CS_IDLE <response_class>
Req-started-unread-response _CS_REQ_STARTED <response_class>
Req-sent-unread-response _CS_REQ_SENT <response_class>
from sys import py3kwarning
from urlparse import urlsplit
with warnings.catch_warnings():
warnings.filterwarnings("ignore", ".*mimetools has been removed",
from cStringIO import StringIO
from StringIO import StringIO
__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
"HTTPException", "NotConnected", "UnknownProtocol",
"UnknownTransferEncoding", "UnimplementedFileMode",
"IncompleteRead", "InvalidURL", "ImproperConnectionState",
"CannotSendRequest", "CannotSendHeader", "ResponseNotReady",
"BadStatusLine", "error", "responses"]
_CS_REQ_STARTED = 'Request-started'
_CS_REQ_SENT = 'Request-sent'
SWITCHING_PROTOCOLS = 101
NON_AUTHORITATIVE_INFORMATION = 203
PROXY_AUTHENTICATION_REQUIRED = 407
PRECONDITION_FAILED = 412
REQUEST_ENTITY_TOO_LARGE = 413
REQUEST_URI_TOO_LONG = 414
UNSUPPORTED_MEDIA_TYPE = 415
REQUESTED_RANGE_NOT_SATISFIABLE = 416
UNPROCESSABLE_ENTITY = 422
INTERNAL_SERVER_ERROR = 500
SERVICE_UNAVAILABLE = 503
HTTP_VERSION_NOT_SUPPORTED = 505
INSUFFICIENT_STORAGE = 507
# Mapping status codes to official W3C names
101: 'Switching Protocols',
203: 'Non-Authoritative Information',
301: 'Moved Permanently',
307: 'Temporary Redirect',
405: 'Method Not Allowed',
407: 'Proxy Authentication Required',
412: 'Precondition Failed',
413: 'Request Entity Too Large',
414: 'Request-URI Too Long',
415: 'Unsupported Media Type',
416: 'Requested Range Not Satisfiable',
417: 'Expectation Failed',
500: 'Internal Server Error',
503: 'Service Unavailable',
505: 'HTTP Version Not Supported',
# maximal amount of data to read at one time in _safe_read
# maximal line length when calling readline().
# maximum amount of headers accepted
# Header name/value ABNF (http://tools.ietf.org/html/rfc7230#section-3.2)
# header-field = field-name ":" OWS field-value OWS
# field-value = *( field-content / obs-fold )
# field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
# field-vchar = VCHAR / obs-text
# obs-fold = CRLF 1*( SP / HTAB )
# ; obsolete line folding
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*"
# / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~"
# ; any VCHAR, except delimiters
# VCHAR defined in http://tools.ietf.org/html/rfc5234#appendix-B.1
# the patterns for both name and value are more lenient than RFC
# definitions to allow for backwards compatibility
_is_legal_header_name = re.compile(r'\A[^:\s][^:\r\n]*\Z').match
_is_illegal_header_value = re.compile(r'\n(?![ \t])|\r(?![ \t\n])').search
# These characters are not allowed within HTTP URL paths.
# See https://tools.ietf.org/html/rfc3986#section-3.3 and the
# https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
# Prevents CVE-2019-9740. Includes control characters such as \r\n.
# Restrict non-ASCII characters above \x7f (0x80-0xff).
_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f-\xff]')
# Arguably only these _should_ allowed:
# _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
# We are more lenient for assumed real world compatibility purposes.
# These characters are not allowed within HTTP method names
# to prevent http header injection.
_contains_disallowed_method_pchar_re = re.compile('[\x00-\x1f]')
# We always set the Content-Length header for these methods because some
# servers will otherwise respond with a 411
_METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
class HTTPMessage(mimetools.Message):
def addheader(self, key, value):
"""Add header for field key handling repeats."""
prev = self.dict.get(key)
combined = ", ".join((prev, value))
self.dict[key] = combined
def addcontinue(self, key, more):
"""Add more field data from a continuation line."""
self.dict[key] = prev + "\n " + more
Read header lines up to the entirely blank line that terminates them.
The (normally blank) line that ends the headers is skipped, but not
included in the returned list. If an invalid line is found in the
header section, it is skipped, and further lines are processed.
The variable self.status is set to the empty string if all went well,
otherwise it is an error message. The variable self.headers is a
completely uninterpreted list of lines contained in the header (so
printing them will reproduce the header exactly as it appears in the
If multiple header fields with the same name occur, they are combined
according to the rules in RFC 2616 sec 4.2:
Appending each subsequent field-value to the first, each separated
by a comma. The order in which header fields with the same field-name
are received is significant to the interpretation of the combined
# XXX The implementation overrides the readheaders() method of
# rfc822.Message. The base class design isn't amenable to
# customized behavior here so the method here is a copy of the
# base class code with a few small changes.
self.headers = hlist = []
if not hasattr(self.fp, 'unread') and self.seekable:
if len(hlist) > _MAXHEADERS:
raise HTTPException("got more than %d headers" % _MAXHEADERS)
line = self.fp.readline(_MAXLINE + 1)
raise LineTooLong("header line")
self.status = 'EOF in headers'
# Skip unix From name time lines
if firstline and line.startswith('From '):
self.unixfrom = self.unixfrom + line
if headerseen and line[0] in ' \t':
# XXX Not sure if continuation lines are handled properly
# for http and/or for repeating headers
# It's a continuation line.
self.addcontinue(headerseen, line.strip())
elif self.iscomment(line):
# It's a comment. Ignore it.
# Note! No pushback here! The delimiter line gets eaten.
headerseen = self.isheader(line)
# It's a legal header line, save it.
self.addheader(headerseen, line[len(headerseen)+1:].strip())
elif headerseen is not None:
# An empty header name. These aren't allowed in HTTP, but it's
# probably a benign mistake. Don't add the header, just keep
# It's not a header line; skip it and try the next line.
self.status = 'Non-header line where header expected'
"""Reads potential header lines into a list from a file pointer.
Length of line is limited by _MAXLINE, and number of
headers is limited by _MAXHEADERS.
line = fp.readline(_MAXLINE + 1)
raise LineTooLong("header line")
if len(headers) > _MAXHEADERS:
raise HTTPException("got more than %d headers" % _MAXHEADERS)
if line in (b'\r\n', b'\n', b''):
# strict: If true, raise BadStatusLine if the status line can't be
# parsed as a valid HTTP/1.0 or 1.1 status line. By default it is
# false because it prevents clients from talking to HTTP/0.9
# servers. Note that a response with a sufficiently corrupted
# status line will look like an HTTP/0.9 response.
# See RFC 2616 sec 19.6 and RFC 1945 sec 6 for details.
def __init__(self, sock, debuglevel=0, strict=0, method=None, buffering=False):
# The caller won't be using any sock.recv() calls, so buffering
# is fine and recommended for performance.
self.fp = sock.makefile('rb')
# The buffer size is specified as zero, because the headers of
# the response are read with readline(). If the reads were
# buffered the readline() calls could consume some of the
# response, which make be read via a recv() on the underlying
self.fp = sock.makefile('rb', 0)
self.debuglevel = debuglevel
# from the Status-Line of the response
self.version = _UNKNOWN # HTTP-Version
self.status = _UNKNOWN # Status-Code
self.reason = _UNKNOWN # Reason-Phrase
self.chunked = _UNKNOWN # is "chunked" being used?
self.chunk_left = _UNKNOWN # bytes left to read in current chunk
self.length = _UNKNOWN # number of bytes left in response
self.will_close = _UNKNOWN # conn will close at end of response
# Initialize with Simple-Response defaults
line = self.fp.readline(_MAXLINE + 1)
raise LineTooLong("header line")
print "reply:", repr(line)
# Presumably, the server closed the connection before
# sending a valid response.
raise BadStatusLine("No status line received - the server has closed the connection")
[version, status, reason] = line.split(None, 2)
[version, status] = line.split(None, 1)
# empty version will cause next test to fail and status
# will be treated as 0.9 response.
if not version.startswith('HTTP/'):
raise BadStatusLine(line)
# assume it's a Simple-Response from an 0.9 server
self.fp = LineAndFileWrapper(line, self.fp)
return "HTTP/0.9", 200, ""
# The status code is a three-digit number
if status < 100 or status > 999:
raise BadStatusLine(line)
raise BadStatusLine(line)
return version, status, reason
# we've already started reading the response
# read until we get a non-100 response
version, status, reason = self._read_status()
# skip the header from the 100 response
skipped_headers = _read_headers(self.fp)
print("headers:", skipped_headers)
self.reason = reason.strip()
if version == 'HTTP/1.0':
elif version.startswith('HTTP/1.'):
self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
elif version == 'HTTP/0.9':
raise UnknownProtocol(version)
self.msg = HTTPMessage(StringIO())
self.msg = HTTPMessage(self.fp, 0)
for hdr in self.msg.headers: