val = val.strip() if sep else None
# This is an RFC 2109 cookie.
# convert expires date to seconds since epoch
val = http2time(strip_quotes(val)) # None if invalid
pairs.append(("version", "0"))
IPV4_RE = re.compile(r"\.\d+$", re.ASCII)
"""Return True if text is a host domain name."""
# This may well be wrong. Which RFC is HDN defined in, if any (for
# the purposes of RFC 2965)?
# For the current implementation, what about IPv6? Remember to look
# at other uses of IPV4_RE also, if change this.
if text[0] == "." or text[-1] == ".":
"""Return True if domain A domain-matches domain B, according to RFC 2965.
A and B may be host domain names or IP addresses.
Host names can be specified either as an IP address or a HDN string.
Sometimes we compare one host name with another. (Such comparisons SHALL
be case-insensitive.) Host A's name domain-matches host B's if
* their host name strings string-compare equal; or
* A is a HDN string and has the form NB, where N is a non-empty
name string, B has the form .B', and B' is a HDN string. (So,
x.y.com domain-matches .Y.com but not Y.com.)
Note that domain-match is not a commutative operation: a.b.c.com
domain-matches .c.com, but not the reverse.
# Note that, if A or B are IP addresses, the only relevant part of the
# definition of the domain-match algorithm is the direct string-compare.
# A does not have form NB, or N is the empty string
if not B.startswith("."):
def liberal_is_HDN(text):
"""Return True if text is a sort-of-like a host domain name.
For accepting/blocking domains.
def user_domain_match(A, B):
"""For blocking/accepting domains.
A and B may be host domain names or IP addresses.
if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
initial_dot = B.startswith(".")
if initial_dot and A.endswith(B):
if not initial_dot and A == B:
cut_port_re = re.compile(r":\d+$", re.ASCII)
def request_host(request):
"""Return request-host, as defined by RFC 2965.
Variation from RFC: returned value is lowercased, for convenient
url = request.get_full_url()
host = urllib.parse.urlparse(url)[1]
host = request.get_header("Host", "")
# remove port, if present
host = cut_port_re.sub("", host, 1)
def eff_request_host(request):
"""Return a tuple (request-host, effective request-host name).
As defined by RFC 2965, except both are lowercased.
erhn = req_host = request_host(request)
if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
erhn = req_host + ".local"
def request_path(request):
"""Path component of request-URI, as defined by RFC 2965."""
url = request.get_full_url()
parts = urllib.parse.urlsplit(url)
path = escape_path(parts.path)
if not path.startswith("/"):
# fix bad RFC 2396 absoluteURI
def request_port(request):
_debug("nonnumeric port: '%s'", port)
# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
def uppercase_escaped_char(match):
return "%%%s" % match.group(1).upper()
"""Escape any invalid characters in HTTP URL, and uppercase all escapes."""
# There's no knowing what character encoding was used to create URLs
# containing %-escapes, but since we have to pick one to escape invalid
# path characters, we pick UTF-8, as recommended in the HTML 4.0
# http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
# And here, kind of: draft-fielding-uri-rfc2396bis-03
# (And in draft IRI specification: draft-duerst-iri-05)
# (And here, for new URI schemes: RFC 2718)
path = urllib.parse.quote(path, HTTP_PATH_SAFE)
path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
"""Return reach of host h, as defined by RFC 2965, section 1.
The reach R of a host name H is defined as follows:
- H is the host domain name of a host; and,
- H has the form A.B; and
- A has no embedded (that is, interior) dots; and
- B has at least one embedded dot, or B is the string "local".
then the reach of H is .B.
* Otherwise, the reach of H is H.
>>> reach("www.acme.com")
#a = h[:i] # this line is only here to show what a is
if is_HDN(h) and (i >= 0 or b == "local"):
def is_third_party(request):
An unverifiable transaction is to a third-party host if its request-
host U does not domain-match the reach R of the request-host O in the
req_host = request_host(request)
if not domain_match(req_host, reach(request.origin_req_host)):
This class represents both Netscape and RFC 2965 cookies.
This is deliberately a very simple class. It just holds attributes. It's
possible to construct Cookie instances that don't comply with the cookie
standards. CookieJar.make_cookies is the factory function for Cookie
objects -- it deals with cookie parsing, supplying defaults, and
normalising to the representation used in this class. CookiePolicy is
responsible for checking them to see whether they should be accepted from
and returned to the server.
Note that the port may be present in the headers, but unspecified ("Port"
rather than"Port=80", for example); if this is the case, port is None.
def __init__(self, version, name, value,
domain, domain_specified, domain_initial_dot,
if version is not None: version = int(version)
if expires is not None: expires = int(float(expires))
if port is None and port_specified is True:
raise ValueError("if port is None, port_specified must be false")
self.port_specified = port_specified
# normalise case, as per RFC 2965 section 3.3.3
self.domain = domain.lower()
self.domain_specified = domain_specified
# Sigh. We need to know whether the domain given in the
# cookie-attribute had an initial dot, in order to follow RFC 2965
# (as clarified in draft errata). Needed for the returned $Domain
self.domain_initial_dot = domain_initial_dot
self.path_specified = path_specified
self.comment_url = comment_url
self._rest = copy.copy(rest)
def has_nonstandard_attr(self, name):
return name in self._rest
def get_nonstandard_attr(self, name, default=None):
return self._rest.get(name, default)
def set_nonstandard_attr(self, name, value):
def is_expired(self, now=None):
if now is None: now = time.time()
if (self.expires is not None) and (self.expires <= now):
if self.port is None: p = ""
limit = self.domain + p + self.path
if self.value is not None:
namevalue = "%s=%s" % (self.name, self.value)
return "<Cookie %s for %s>" % (namevalue, limit)
for name in ("version", "name", "value",
"port", "port_specified",
"domain", "domain_specified", "domain_initial_dot",
"path", "path_specified",
"secure", "expires", "discard", "comment", "comment_url",
attr = getattr(self, name)
args.append("%s=%s" % (name, repr(attr)))
args.append("rest=%s" % repr(self._rest))
args.append("rfc2109=%s" % repr(self.rfc2109))
return "%s(%s)" % (self.__class__.__name__, ", ".join(args))
"""Defines which cookies get accepted from and returned to server.
May also modify cookies, though this is probably a bad idea.
The subclass DefaultCookiePolicy defines the standard rules for Netscape
and RFC 2965 cookies -- override that if you want a customized policy.
def set_ok(self, cookie, request):
"""Return true if (and only if) cookie should be accepted from server.
Currently, pre-expired cookies never get this far -- the CookieJar
class deletes such cookies itself.
raise NotImplementedError()
def return_ok(self, cookie, request):
"""Return true if (and only if) cookie should be returned to server."""
raise NotImplementedError()
def domain_return_ok(self, domain, request):
"""Return false if cookies should not be returned, given cookie domain.
def path_return_ok(self, path, request):
"""Return false if cookies should not be returned, given cookie path.
class DefaultCookiePolicy(CookiePolicy):
"""Implements the standard rules for accepting and returning cookies."""
DomainStrictNonDomain = 2
DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
blocked_domains=None, allowed_domains=None,
netscape=True, rfc2965=False,
rfc2109_as_netscape=None,
strict_rfc2965_unverifiable=True,
strict_ns_unverifiable=False,
strict_ns_domain=DomainLiberal,
strict_ns_set_initial_dollar=False,
strict_ns_set_path=False,
secure_protocols=("https", "wss")
"""Constructor arguments should be passed as keyword arguments only."""
self.rfc2109_as_netscape = rfc2109_as_netscape
self.hide_cookie2 = hide_cookie2
self.strict_domain = strict_domain
self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
self.strict_ns_unverifiable = strict_ns_unverifiable
self.strict_ns_domain = strict_ns_domain
self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
self.strict_ns_set_path = strict_ns_set_path
self.secure_protocols = secure_protocols
if blocked_domains is not None:
self._blocked_domains = tuple(blocked_domains)
self._blocked_domains = ()
if allowed_domains is not None:
allowed_domains = tuple(allowed_domains)
self._allowed_domains = allowed_domains
def blocked_domains(self):
"""Return the sequence of blocked domains (as a tuple)."""
return self._blocked_domains
def set_blocked_domains(self, blocked_domains):
"""Set the sequence of blocked domains."""
self._blocked_domains = tuple(blocked_domains)
def is_blocked(self, domain):
for blocked_domain in self._blocked_domains:
if user_domain_match(domain, blocked_domain):
def allowed_domains(self):
"""Return None, or the sequence of allowed domains (as a tuple)."""
return self._allowed_domains
def set_allowed_domains(self, allowed_domains):
"""Set the sequence of allowed domains, or None."""
if allowed_domains is not None:
allowed_domains = tuple(allowed_domains)
self._allowed_domains = allowed_domains
def is_not_allowed(self, domain):
if self._allowed_domains is None:
for allowed_domain in self._allowed_domains:
if user_domain_match(domain, allowed_domain):
def set_ok(self, cookie, request):
If you override .set_ok(), be sure to call this method. If it returns
false, so should your subclass (assuming your subclass wants to be more
strict about which cookies to accept).
_debug(" - checking cookie %s=%s", cookie.name, cookie.value)
assert cookie.name is not None
for n in "version", "verifiability", "name", "path", "domain", "port":
fn = getattr(self, fn_name)
if not fn(cookie, request):
def set_ok_version(self, cookie, request):
if cookie.version is None:
# Version is always set to 0 by parse_ns_headers if it's a Netscape
# cookie, so this must be an invalid RFC 2965 cookie.
_debug(" Set-Cookie2 without version attribute (%s=%s)",
cookie.name, cookie.value)
if cookie.version > 0 and not self.rfc2965:
_debug(" RFC 2965 cookies are switched off")
elif cookie.version == 0 and not self.netscape:
_debug(" Netscape cookies are switched off")
def set_ok_verifiability(self, cookie, request):
if request.unverifiable and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
_debug(" third-party RFC 2965 cookie during "
"unverifiable transaction")
elif cookie.version == 0 and self.strict_ns_unverifiable:
_debug(" third-party Netscape cookie during "
"unverifiable transaction")
def set_ok_name(self, cookie, request):
# Try and stop servers setting V0 cookies designed to hack other
# servers that know both V0 and V1 protocols.
if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
cookie.name.startswith("$")):
_debug(" illegal name (starts with '$'): '%s'", cookie.name)
def set_ok_path(self, cookie, request):
if cookie.path_specified:
req_path = request_path(request)
if ((cookie.version > 0 or