# (discard is also set if expires is Absent)
discard = standard.get("discard", False)
comment = standard.get("comment", None)
comment_url = standard.get("commenturl", None)
if path is not Absent and path != "":
path = request_path(request)
# Netscape spec parts company from reality here
if len(path) == 0: path = "/"
domain_specified = domain is not Absent
# but first we have to remember whether it starts with a dot
domain_initial_dot = False
domain_initial_dot = bool(domain.startswith("."))
req_host, erhn = eff_request_host(request)
elif not domain.startswith("."):
# Port attr present, but has no value: default to request port.
# Cookie should then only be sent back on that port.
port = request_port(request)
port = re.sub(r"\s+", "", port)
# No port attr present. Cookie can be sent back on any port.
# set default expires and discard
elif expires <= self._now:
# Expiry date in past is request to delete cookie. This can't be
# in DefaultCookiePolicy, because can't delete cookies there.
self.clear(domain, path, name)
_debug("Expiring cookie, domain='%s', path='%s', name='%s'",
domain, domain_specified, domain_initial_dot,
def _cookies_from_attrs_set(self, attrs_set, request):
cookie_tuples = self._normalized_cookie_tuples(attrs_set)
for tup in cookie_tuples:
cookie = self._cookie_from_cookie_tuple(tup, request)
if cookie: cookies.append(cookie)
def _process_rfc2109_cookies(self, cookies):
rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
if rfc2109_as_ns is None:
rfc2109_as_ns = not self._policy.rfc2965
# treat 2109 cookies as Netscape cookies rather than
def make_cookies(self, response, request):
"""Return sequence of Cookie objects extracted from response object."""
# get cookie-attributes for RFC 2965 and Netscape protocols
headers = response.info()
rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
ns_hdrs = headers.get_all("Set-Cookie", [])
self._policy._now = self._now = int(time.time())
rfc2965 = self._policy.rfc2965
netscape = self._policy.netscape
if ((not rfc2965_hdrs and not ns_hdrs) or
(not ns_hdrs and not rfc2965) or
(not rfc2965_hdrs and not netscape) or
(not netscape and not rfc2965)):
return [] # no relevant cookie headers: quick exit
cookies = self._cookies_from_attrs_set(
split_header_words(rfc2965_hdrs), request)
_warn_unhandled_exception()
# RFC 2109 and Netscape cookies
ns_cookies = self._cookies_from_attrs_set(
parse_ns_headers(ns_hdrs), request)
_warn_unhandled_exception()
self._process_rfc2109_cookies(ns_cookies)
# Look for Netscape cookies (from Set-Cookie headers) that match
# corresponding RFC 2965 cookies (from Set-Cookie2 headers).
# For each match, keep the RFC 2965 cookie and ignore the Netscape
# cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
# bundled in with the Netscape cookies for this purpose, which is
lookup[(cookie.domain, cookie.path, cookie.name)] = None
def no_matching_rfc2965(ns_cookie, lookup=lookup):
key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
ns_cookies = filter(no_matching_rfc2965, ns_cookies)
cookies.extend(ns_cookies)
def set_cookie_if_ok(self, cookie, request):
"""Set a cookie if policy says it's OK to do so."""
self._cookies_lock.acquire()
self._policy._now = self._now = int(time.time())
if self._policy.set_ok(cookie, request):
self._cookies_lock.release()
def set_cookie(self, cookie):
"""Set a cookie, without checking whether or not it should be set."""
self._cookies_lock.acquire()
if cookie.domain not in c: c[cookie.domain] = {}
if cookie.path not in c2: c2[cookie.path] = {}
self._cookies_lock.release()
def extract_cookies(self, response, request):
"""Extract cookies from response, where allowable given the request."""
_debug("extract_cookies: %s", response.info())
self._cookies_lock.acquire()
for cookie in self.make_cookies(response, request):
if self._policy.set_ok(cookie, request):
_debug(" setting cookie: %s", cookie)
self._cookies_lock.release()
def clear(self, domain=None, path=None, name=None):
Invoking this method without arguments will clear all cookies. If
given a single argument, only cookies belonging to that domain will be
removed. If given two arguments, cookies belonging to the specified
path within that domain are removed. If given three arguments, then
the cookie with the specified name, path and domain is removed.
Raises KeyError if no matching cookie exists.
if (domain is None) or (path is None):
"domain and path must be given to remove a cookie by name")
del self._cookies[domain][path][name]
"domain must be given to remove cookies by path")
del self._cookies[domain][path]
del self._cookies[domain]
def clear_session_cookies(self):
"""Discard all session cookies.
Note that the .save() method won't save session cookies anyway, unless
you ask otherwise by passing a true ignore_discard argument.
self._cookies_lock.acquire()
self.clear(cookie.domain, cookie.path, cookie.name)
self._cookies_lock.release()
def clear_expired_cookies(self):
"""Discard all expired cookies.
You probably don't need to call this method: expired cookies are never
sent back to the server (provided you're using DefaultCookiePolicy),
this method is called by CookieJar itself every so often, and the
.save() method won't save expired cookies anyway (unless you ask
otherwise by passing a true ignore_expires argument).
self._cookies_lock.acquire()
if cookie.is_expired(now):
self.clear(cookie.domain, cookie.path, cookie.name)
self._cookies_lock.release()
return deepvalues(self._cookies)
"""Return number of contained cookies."""
for cookie in self: i = i + 1
for cookie in self: r.append(repr(cookie))
return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
for cookie in self: r.append(str(cookie))
return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
# derives from OSError for backwards-compatibility with Python 2.4.0
class LoadError(OSError): pass
class FileCookieJar(CookieJar):
"""CookieJar that can be loaded from and saved to a file."""
def __init__(self, filename=None, delayload=False, policy=None):
Cookies are NOT loaded from the named file until either the .load() or
.revert() method is called.
CookieJar.__init__(self, policy)
filename = os.fspath(filename)
self.delayload = bool(delayload)
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Save cookies to a file."""
raise NotImplementedError()
def load(self, filename=None, ignore_discard=False, ignore_expires=False):
"""Load cookies from a file."""
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
with open(filename) as f:
self._really_load(f, filename, ignore_discard, ignore_expires)
def revert(self, filename=None,
ignore_discard=False, ignore_expires=False):
"""Clear all cookies and reload cookies from a saved file.
Raises LoadError (or OSError) if reversion is not successful; the
object's state will not be altered if this happens.
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
self._cookies_lock.acquire()
old_state = copy.deepcopy(self._cookies)
self.load(filename, ignore_discard, ignore_expires)
self._cookies = old_state
self._cookies_lock.release()
def lwp_cookie_str(cookie):
"""Return string representation of Cookie in the LWP cookie file format.
Actually, the format is extended a bit -- see module docstring.
h = [(cookie.name, cookie.value),
("domain", cookie.domain)]
if cookie.port is not None: h.append(("port", cookie.port))
if cookie.path_specified: h.append(("path_spec", None))
if cookie.port_specified: h.append(("port_spec", None))
if cookie.domain_initial_dot: h.append(("domain_dot", None))
if cookie.secure: h.append(("secure", None))
if cookie.expires: h.append(("expires",
time2isoz(float(cookie.expires))))
if cookie.discard: h.append(("discard", None))
if cookie.comment: h.append(("comment", cookie.comment))
if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
keys = sorted(cookie._rest.keys())
h.append((k, str(cookie._rest[k])))
h.append(("version", str(cookie.version)))
return join_header_words([h])
class LWPCookieJar(FileCookieJar):
The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
"Set-Cookie3" is the format used by the libwww-perl library, not known
to be compatible with any browser, but which is easy to read and
doesn't lose information about RFC 2965 cookies.
as_lwp_str(ignore_discard=True, ignore_expired=True)
def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
"""Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
ignore_discard and ignore_expires: see docstring for FileCookieJar.save
if not ignore_discard and cookie.discard:
if not ignore_expires and cookie.is_expired(now):
r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
def save(self, filename=None, ignore_discard=False, ignore_expires=False):
if self.filename is not None: filename = self.filename
else: raise ValueError(MISSING_FILENAME_TEXT)
with open(filename, "w") as f:
# There really isn't an LWP Cookies 2.0 format, but this indicates
# that there is extra information in here (domain_dot and
# port_spec) while still being compatible with libwww-perl, I hope.
f.write("#LWP-Cookies-2.0\n")
f.write(self.as_lwp_str(ignore_discard, ignore_expires))
def _really_load(self, f, filename, ignore_discard, ignore_expires):
if not self.magic_re.search(magic):
msg = ("%r does not look like a Set-Cookie3 (LWP) format "
boolean_attrs = ("port_spec", "path_spec", "domain_dot",
value_attrs = ("version",
"port", "path", "domain",
if not line.startswith(header):
line = line[len(header):].strip()
for data in split_header_words([line]):
# don't lose case distinction for unknown fields
if (lc in value_attrs) or (lc in boolean_attrs):
expires = iso2time(expires)
domain_specified = domain.startswith(".")
c = Cookie(h("version"), name, value,
h("port"), h("port_spec"),
domain, domain_specified, h("domain_dot"),
h("path"), h("path_spec"),
if not ignore_discard and c.discard:
if not ignore_expires and c.is_expired(now):
_warn_unhandled_exception()
raise LoadError("invalid Set-Cookie3 format file %r: %r" %
class MozillaCookieJar(FileCookieJar):
WARNING: you may want to backup your browser's cookies file if you use
this class to save cookies. I *think* it works, but there have been
This class differs from CookieJar only in the format it uses to save and
load cookies to and from a file. This class uses the Mozilla/Netscape
`cookies.txt' format. lynx uses this file format, too.
Don't expect cookies saved while the browser is running to be noticed by
the browser (in fact, Mozilla on unix will overwrite your saved cookies if
you change them on disk while it's running; on Windows, you probably can't
save at all while the browser is running).
Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
Netscape cookies on saving.
In particular, the cookie version and port number information is lost,
together with information about whether or not Path, Port and Discard were
specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
domain as set in the HTTP header started with a dot (yes, I'm aware some
domains in Netscape files start with a dot and some don't -- trust me, you
really don't want to know any more about this).