(cookie.version == 0 and self.strict_ns_set_path)) and
not self.path_return_ok(cookie.path, request)):
_debug(" path attribute %s is not a prefix of request "
"path %s", cookie.path, req_path)
def set_ok_domain(self, cookie, request):
if self.is_blocked(cookie.domain):
_debug(" domain %s is in user block-list", cookie.domain)
if self.is_not_allowed(cookie.domain):
_debug(" domain %s is not in user allow-list", cookie.domain)
if cookie.domain_specified:
req_host, erhn = eff_request_host(request)
if self.strict_domain and (domain.count(".") >= 2):
# XXX This should probably be compared with the Konqueror
# (kcookiejar.cpp) and Mozilla implementations, but it's a
j = domain.rfind(".", 0, i)
if j == 0: # domain like .foo.bar
if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
"gov", "mil", "int", "aero", "biz", "cat", "coop",
"info", "jobs", "mobi", "museum", "name", "pro",
"travel", "eu") and len(tld) == 2:
_debug(" country-code second level domain %s", domain)
if domain.startswith("."):
undotted_domain = domain[1:]
embedded_dots = (undotted_domain.find(".") >= 0)
if not embedded_dots and domain != ".local":
_debug(" non-local domain %s contains no embedded dot",
if (not erhn.endswith(domain) and
(not erhn.startswith(".") and
not ("."+erhn).endswith(domain))):
_debug(" effective request-host %s (even with added "
"initial dot) does not end with %s",
if (cookie.version > 0 or
(self.strict_ns_domain & self.DomainRFC2965Match)):
if not domain_match(erhn, domain):
_debug(" effective request-host %s does not domain-match "
if (cookie.version > 0 or
(self.strict_ns_domain & self.DomainStrictNoDots)):
host_prefix = req_host[:-len(domain)]
if (host_prefix.find(".") >= 0 and
not IPV4_RE.search(req_host)):
_debug(" host prefix %s for domain %s contains a dot",
def set_ok_port(self, cookie, request):
if cookie.port_specified:
req_port = request_port(request)
for p in cookie.port.split(","):
_debug(" bad port %s (not numeric)", p)
_debug(" request port (%s) not found in %s",
def return_ok(self, cookie, request):
If you override .return_ok(), be sure to call this method. If it
returns false, so should your subclass (assuming your subclass wants to
be more strict about which cookies to return).
# Path has already been checked by .path_return_ok(), and domain
# blocking done by .domain_return_ok().
_debug(" - checking cookie %s=%s", cookie.name, cookie.value)
for n in "version", "verifiability", "secure", "expires", "port", "domain":
fn = getattr(self, fn_name)
if not fn(cookie, request):
def return_ok_version(self, cookie, request):
if cookie.version > 0 and not self.rfc2965:
_debug(" RFC 2965 cookies are switched off")
elif cookie.version == 0 and not self.netscape:
_debug(" Netscape cookies are switched off")
def return_ok_verifiability(self, cookie, request):
if request.unverifiable and is_third_party(request):
if cookie.version > 0 and self.strict_rfc2965_unverifiable:
_debug(" third-party RFC 2965 cookie during unverifiable "
elif cookie.version == 0 and self.strict_ns_unverifiable:
_debug(" third-party Netscape cookie during unverifiable "
def return_ok_secure(self, cookie, request):
if cookie.secure and request.type not in self.secure_protocols:
_debug(" secure cookie with non-secure request")
def return_ok_expires(self, cookie, request):
if cookie.is_expired(self._now):
_debug(" cookie expired")
def return_ok_port(self, cookie, request):
req_port = request_port(request)
for p in cookie.port.split(","):
_debug(" request port %s does not match cookie port %s",
def return_ok_domain(self, cookie, request):
req_host, erhn = eff_request_host(request)
if domain and not domain.startswith("."):
# strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
if (cookie.version == 0 and
(self.strict_ns_domain & self.DomainStrictNonDomain) and
not cookie.domain_specified and domain != erhn):
_debug(" cookie with unspecified domain does not string-compare "
"equal to request domain")
if cookie.version > 0 and not domain_match(erhn, domain):
_debug(" effective request-host name %s does not domain-match "
"RFC 2965 cookie domain %s", erhn, domain)
if cookie.version == 0 and not ("."+erhn).endswith(dotdomain):
_debug(" request-host %s does not match Netscape cookie domain "
def domain_return_ok(self, domain, request):
# Liberal check of. This is here as an optimization to avoid
# having to load lots of MSIE cookie files unless necessary.
req_host, erhn = eff_request_host(request)
if not req_host.startswith("."):
if not erhn.startswith("."):
if domain and not domain.startswith("."):
if not (req_host.endswith(dotdomain) or erhn.endswith(dotdomain)):
#_debug(" request domain %s does not match cookie domain %s",
if self.is_blocked(domain):
_debug(" domain %s is in user block-list", domain)
if self.is_not_allowed(domain):
_debug(" domain %s is not in user allow-list", domain)
def path_return_ok(self, path, request):
_debug("- checking cookie path=%s", path)
req_path = request_path(request)
elif (req_path.startswith(path) and
(path.endswith("/") or req_path[pathlen:pathlen+1] == "/")):
_debug(" %s does not path-match %s", req_path, path)
def vals_sorted_by_key(adict):
keys = sorted(adict.keys())
return map(adict.get, keys)
"""Iterates over nested mapping, depth-first, in sorted order by key."""
values = vals_sorted_by_key(mapping)
yield from deepvalues(obj)
# Used as second parameter to dict.get() method, to distinguish absent
# dict key from one with a None value.
"""Collection of HTTP cookies.
You may not need to know about this class: try
urllib.request.build_opener(HTTPCookieProcessor).open(url).
non_word_re = re.compile(r"\W")
quote_re = re.compile(r"([\"\\])")
strict_domain_re = re.compile(r"\.?[^.]*")
domain_re = re.compile(r"[^.]*")
dots_re = re.compile(r"^\.+")
magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
def __init__(self, policy=None):
policy = DefaultCookiePolicy()
self._cookies_lock = _threading.RLock()
def set_policy(self, policy):
def _cookies_for_domain(self, domain, request):
if not self._policy.domain_return_ok(domain, request):
_debug("Checking %s for cookies to return", domain)
cookies_by_path = self._cookies[domain]
for path in cookies_by_path.keys():
if not self._policy.path_return_ok(path, request):
cookies_by_name = cookies_by_path[path]
for cookie in cookies_by_name.values():
if not self._policy.return_ok(cookie, request):
_debug(" not returning cookie")
def _cookies_for_request(self, request):
"""Return a list of cookies to be returned to server."""
for domain in self._cookies.keys():
cookies.extend(self._cookies_for_domain(domain, request))
def _cookie_attrs(self, cookies):
"""Return a list of cookie-attributes to be returned to server.
like ['foo="bar"; $Path="/"', ...]
The $Version attribute is also added when appropriate (currently only
# add cookies in order of most specific (ie. longest) path first
cookies.sort(key=lambda a: len(a.path), reverse=True)
# set version of Cookie header
# What should it be if multiple matching Set-Cookie headers have
# different versions themselves?
# Answer: there is no answer; was supposed to be settled by
# RFC 2965 errata, but that may never appear...
attrs.append("$Version=%s" % version)
# quote cookie value if necessary
# (not for Netscape protocol, which already has any quotes
# intact, due to the poorly-specified Netscape Cookie: syntax)
if ((cookie.value is not None) and
self.non_word_re.search(cookie.value) and version > 0):
value = self.quote_re.sub(r"\\\1", cookie.value)
# add cookie-attributes to be returned in Cookie header
attrs.append(cookie.name)
attrs.append("%s=%s" % (cookie.name, value))
if cookie.path_specified:
attrs.append('$Path="%s"' % cookie.path)
if cookie.domain.startswith("."):
if (not cookie.domain_initial_dot and
attrs.append('$Domain="%s"' % domain)
if cookie.port is not None:
if cookie.port_specified:
p = p + ('="%s"' % cookie.port)
def add_cookie_header(self, request):
"""Add correct Cookie: header to request (urllib.request.Request object).
The Cookie2 header is also added unless policy.hide_cookie2 is true.
_debug("add_cookie_header")
self._cookies_lock.acquire()
self._policy._now = self._now = int(time.time())
cookies = self._cookies_for_request(request)
attrs = self._cookie_attrs(cookies)
if not request.has_header("Cookie"):
request.add_unredirected_header(
"Cookie", "; ".join(attrs))
# if necessary, advertise that we know RFC 2965
if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
not request.has_header("Cookie2")):
request.add_unredirected_header("Cookie2", '$Version="1"')
self._cookies_lock.release()
self.clear_expired_cookies()
def _normalized_cookie_tuples(self, attrs_set):
"""Return list of tuples containing normalised cookie information.
attrs_set is the list of lists of key,value pairs extracted from
the Set-Cookie or Set-Cookie2 headers.
Tuples are name, value, standard, rest, where name and value are the
cookie name and value, standard is a dictionary containing the standard
cookie-attributes (discard, secure, version, expires or max-age,
domain, path and port) and rest is a dictionary containing the rest of
boolean_attrs = "discard", "secure"
value_attrs = ("version",
"domain", "path", "port",
for cookie_attrs in attrs_set:
name, value = cookie_attrs[0]
# Build dictionary of standard cookie-attributes (standard) and
# dictionary of other cookie-attributes (rest).
# Note: expiry time is normalised to seconds since epoch. V0
# cookies should have the Expires cookie-attribute, and V1 cookies
# should have Max-Age, but since V1 includes RFC 2109 cookies (and
# since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
# accept either (but prefer Max-Age).
for k, v in cookie_attrs[1:]:
# don't lose case distinction for unknown fields
if lc in value_attrs or lc in boolean_attrs:
if k in boolean_attrs and v is None:
# boolean cookie-attribute is present, but has no value
# (like "discard", rather than "port=80")
# only first value is significant
_debug(" missing value for domain attribute")
# Prefer max-age to expires (like Mozilla)
_debug(" missing or invalid value for expires "
"attribute: treating as session cookie")
_debug(" missing or invalid (non-numeric) value for "
# convert RFC 2965 Max-Age to seconds since epoch
# XXX Strictly you're supposed to follow RFC 2616
# age-calculation rules. Remember that zero Max-Age
# is a request to discard (old and new) cookie, though.
if (k in value_attrs) or (k in boolean_attrs):
k not in ("port", "comment", "commenturl")):
_debug(" missing value for %s attribute" % k)
cookie_tuples.append((name, value, standard, rest))
def _cookie_from_cookie_tuple(self, tup, request):
# standard is dict of standard cookie-attributes, rest is dict of the
name, value, standard, rest = tup
domain = standard.get("domain", Absent)
path = standard.get("path", Absent)
port = standard.get("port", Absent)
expires = standard.get("expires", Absent)
version = standard.get("version", None)
return None # invalid version, ignore cookie
secure = standard.get("secure", False)