Edit File by line

r"""HTTP cookie handling for web clients.

[0] Fix | Delete

[1] Fix | Delete

This module has (now fairly distant) origins in Gisle Aas' Perl module

[2] Fix | Delete

HTTP::Cookies, from the libwww-perl library.

[3] Fix | Delete

[4] Fix | Delete

Docstrings, comments and debug strings in this code refer to the

[5] Fix | Delete

attributes of the HTTP cookie system as cookie-attributes, to distinguish

[6] Fix | Delete

them clearly from Python attributes.

[7] Fix | Delete

[8] Fix | Delete

Class diagram (note that BSDDBCookieJar and the MSIE* classes are not

[9] Fix | Delete

distributed with the Python standard library, but are available from

[10] Fix | Delete

http://wwwsearch.sf.net/):

[11] Fix | Delete

[12] Fix | Delete

CookieJar____

[13] Fix | Delete

/ \ \

[14] Fix | Delete

FileCookieJar \ \

[15] Fix | Delete

/ | \ \ \

[16] Fix | Delete

MozillaCookieJar | LWPCookieJar \ \

[17] Fix | Delete

| | \

[18] Fix | Delete

| ---MSIEBase | \

[19] Fix | Delete

| / | | \

[20] Fix | Delete

| / MSIEDBCookieJar BSDDBCookieJar

[21] Fix | Delete

[22] Fix | Delete

MSIECookieJar

[23] Fix | Delete

[24] Fix | Delete

"""

[25] Fix | Delete

[26] Fix | Delete

__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',

[27] Fix | Delete

'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']

[28] Fix | Delete

[29] Fix | Delete

import os

[30] Fix | Delete

import copy

[31] Fix | Delete

import datetime

[32] Fix | Delete

import re

[33] Fix | Delete

import time

[34] Fix | Delete

import urllib.parse, urllib.request

[35] Fix | Delete

import threading as _threading

[36] Fix | Delete

import http.client # only for the default HTTP port

[37] Fix | Delete

from calendar import timegm

[38] Fix | Delete

[39] Fix | Delete

debug = False # set to True to enable debugging via the logging module

[40] Fix | Delete

logger = None

[41] Fix | Delete

[42] Fix | Delete

def _debug(*args):

[43] Fix | Delete

if not debug:

[44] Fix | Delete

return

[45] Fix | Delete

global logger

[46] Fix | Delete

if not logger:

[47] Fix | Delete

import logging

[48] Fix | Delete

logger = logging.getLogger("http.cookiejar")

[49] Fix | Delete

return logger.debug(*args)

[50] Fix | Delete

[51] Fix | Delete

[52] Fix | Delete

DEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)

[53] Fix | Delete

MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "

[54] Fix | Delete

"instance initialised with one)")

[55] Fix | Delete

[56] Fix | Delete

def _warn_unhandled_exception():

[57] Fix | Delete

# There are a few catch-all except: statements in this module, for

[58] Fix | Delete

# catching input that's bad in unexpected ways. Warn if any

[59] Fix | Delete

# exceptions are caught there.

[60] Fix | Delete

import io, warnings, traceback

[61] Fix | Delete

f = io.StringIO()

[62] Fix | Delete

traceback.print_exc(None, f)

[63] Fix | Delete

msg = f.getvalue()

[64] Fix | Delete

warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)

[65] Fix | Delete

[66] Fix | Delete

[67] Fix | Delete

# Date/time conversion

[68] Fix | Delete

# -----------------------------------------------------------------------------

[69] Fix | Delete

[70] Fix | Delete

EPOCH_YEAR = 1970

[71] Fix | Delete

def _timegm(tt):

[72] Fix | Delete

year, month, mday, hour, min, sec = tt[:6]

[73] Fix | Delete

if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and

[74] Fix | Delete

(0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):

[75] Fix | Delete

return timegm(tt)

[76] Fix | Delete

else:

[77] Fix | Delete

return None

[78] Fix | Delete

[79] Fix | Delete

DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

[80] Fix | Delete

MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",

[81] Fix | Delete

"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

[82] Fix | Delete

MONTHS_LOWER = []

[83] Fix | Delete

for month in MONTHS: MONTHS_LOWER.append(month.lower())

[84] Fix | Delete

[85] Fix | Delete

def time2isoz(t=None):

[86] Fix | Delete

"""Return a string representing time in seconds since epoch, t.

[87] Fix | Delete

[88] Fix | Delete

If the function is called without an argument, it will use the current

[89] Fix | Delete

time.

[90] Fix | Delete

[91] Fix | Delete

The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",

[92] Fix | Delete

representing Universal Time (UTC, aka GMT). An example of this format is:

[93] Fix | Delete

[94] Fix | Delete

1994-11-24 08:49:37Z

[95] Fix | Delete

[96] Fix | Delete

"""

[97] Fix | Delete

if t is None:

[98] Fix | Delete

dt = datetime.datetime.utcnow()

[99] Fix | Delete

else:

[100] Fix | Delete

dt = datetime.datetime.utcfromtimestamp(t)

[101] Fix | Delete

return "%04d-%02d-%02d %02d:%02d:%02dZ" % (

[102] Fix | Delete

dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)

[103] Fix | Delete

[104] Fix | Delete

def time2netscape(t=None):

[105] Fix | Delete

"""Return a string representing time in seconds since epoch, t.

[106] Fix | Delete

[107] Fix | Delete

If the function is called without an argument, it will use the current

[108] Fix | Delete

time.

[109] Fix | Delete

[110] Fix | Delete

The format of the returned string is like this:

[111] Fix | Delete

[112] Fix | Delete

Wed, DD-Mon-YYYY HH:MM:SS GMT

[113] Fix | Delete

[114] Fix | Delete

"""

[115] Fix | Delete

if t is None:

[116] Fix | Delete

dt = datetime.datetime.utcnow()

[117] Fix | Delete

else:

[118] Fix | Delete

dt = datetime.datetime.utcfromtimestamp(t)

[119] Fix | Delete

return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (

[120] Fix | Delete

DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],

[121] Fix | Delete

dt.year, dt.hour, dt.minute, dt.second)

[122] Fix | Delete

[123] Fix | Delete

[124] Fix | Delete

UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}

[125] Fix | Delete

[126] Fix | Delete

TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)

[127] Fix | Delete

def offset_from_tz_string(tz):

[128] Fix | Delete

offset = None

[129] Fix | Delete

if tz in UTC_ZONES:

[130] Fix | Delete

offset = 0

[131] Fix | Delete

else:

[132] Fix | Delete

m = TIMEZONE_RE.search(tz)

[133] Fix | Delete

if m:

[134] Fix | Delete

offset = 3600 * int(m.group(2))

[135] Fix | Delete

if m.group(3):

[136] Fix | Delete

offset = offset + 60 * int(m.group(3))

[137] Fix | Delete

if m.group(1) == '-':

[138] Fix | Delete

offset = -offset

[139] Fix | Delete

return offset

[140] Fix | Delete

[141] Fix | Delete

def _str2time(day, mon, yr, hr, min, sec, tz):

[142] Fix | Delete

yr = int(yr)

[143] Fix | Delete

if yr > datetime.MAXYEAR:

[144] Fix | Delete

return None

[145] Fix | Delete

[146] Fix | Delete

# translate month name to number

[147] Fix | Delete

# month numbers start with 1 (January)

[148] Fix | Delete

try:

[149] Fix | Delete

mon = MONTHS_LOWER.index(mon.lower())+1

[150] Fix | Delete

except ValueError:

[151] Fix | Delete

# maybe it's already a number

[152] Fix | Delete

try:

[153] Fix | Delete

imon = int(mon)

[154] Fix | Delete

except ValueError:

[155] Fix | Delete

return None

[156] Fix | Delete

if 1 <= imon <= 12:

[157] Fix | Delete

mon = imon

[158] Fix | Delete

else:

[159] Fix | Delete

return None

[160] Fix | Delete

[161] Fix | Delete

# make sure clock elements are defined

[162] Fix | Delete

if hr is None: hr = 0

[163] Fix | Delete

if min is None: min = 0

[164] Fix | Delete

if sec is None: sec = 0

[165] Fix | Delete

[166] Fix | Delete

day = int(day)

[167] Fix | Delete

hr = int(hr)

[168] Fix | Delete

min = int(min)

[169] Fix | Delete

sec = int(sec)

[170] Fix | Delete

[171] Fix | Delete

if yr < 1000:

[172] Fix | Delete

# find "obvious" year

[173] Fix | Delete

cur_yr = time.localtime(time.time())[0]

[174] Fix | Delete

m = cur_yr % 100

[175] Fix | Delete

tmp = yr

[176] Fix | Delete

yr = yr + cur_yr - m

[177] Fix | Delete

m = m - tmp

[178] Fix | Delete

if abs(m) > 50:

[179] Fix | Delete

if m > 0: yr = yr + 100

[180] Fix | Delete

else: yr = yr - 100

[181] Fix | Delete

[182] Fix | Delete

# convert UTC time tuple to seconds since epoch (not timezone-adjusted)

[183] Fix | Delete

t = _timegm((yr, mon, day, hr, min, sec, tz))

[184] Fix | Delete

[185] Fix | Delete

if t is not None:

[186] Fix | Delete

# adjust time using timezone string, to get absolute time since epoch

[187] Fix | Delete

if tz is None:

[188] Fix | Delete

tz = "UTC"

[189] Fix | Delete

tz = tz.upper()

[190] Fix | Delete

offset = offset_from_tz_string(tz)

[191] Fix | Delete

if offset is None:

[192] Fix | Delete

return None

[193] Fix | Delete

t = t - offset

[194] Fix | Delete

[195] Fix | Delete

return t

[196] Fix | Delete

[197] Fix | Delete

STRICT_DATE_RE = re.compile(

[198] Fix | Delete

r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "

[199] Fix | Delete

r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)

[200] Fix | Delete

WEEKDAY_RE = re.compile(

[201] Fix | Delete

r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)

[202] Fix | Delete

LOOSE_HTTP_DATE_RE = re.compile(

[203] Fix | Delete

r"""^

[204] Fix | Delete

(\d\d?) # day

[205] Fix | Delete

(?:\s+|[-\/])

[206] Fix | Delete

(\w+) # month

[207] Fix | Delete

(?:\s+|[-\/])

[208] Fix | Delete

(\d+) # year

[209] Fix | Delete

(?:

[210] Fix | Delete

(?:\s+|:) # separator before clock

[211] Fix | Delete

(\d\d?):(\d\d) # hour:min

[212] Fix | Delete

(?::(\d\d))? # optional seconds

[213] Fix | Delete

)? # optional clock

[214] Fix | Delete

\s*

[215] Fix | Delete

(?:

[216] Fix | Delete

([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+) # timezone

[217] Fix | Delete

\s*

[218] Fix | Delete

[219] Fix | Delete

(?:

[220] Fix | Delete

$\w+$ # ASCII representation of timezone in parens.

[221] Fix | Delete

\s*

[222] Fix | Delete

)?$""", re.X | re.ASCII)

[223] Fix | Delete

def http2time(text):

[224] Fix | Delete

"""Returns time in seconds since epoch of time represented by a string.

[225] Fix | Delete

[226] Fix | Delete

Return value is an integer.

[227] Fix | Delete

[228] Fix | Delete

None is returned if the format of str is unrecognized, the time is outside

[229] Fix | Delete

the representable range, or the timezone string is not recognized. If the

[230] Fix | Delete

string contains no timezone, UTC is assumed.

[231] Fix | Delete

[232] Fix | Delete

The timezone in the string may be numerical (like "-0800" or "+0100") or a

[233] Fix | Delete

string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the

[234] Fix | Delete

timezone strings equivalent to UTC (zero offset) are known to the function.

[235] Fix | Delete

[236] Fix | Delete

The function loosely parses the following formats:

[237] Fix | Delete

[238] Fix | Delete

Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format

[239] Fix | Delete

Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format

[240] Fix | Delete

Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format

[241] Fix | Delete

09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)

[242] Fix | Delete

08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)

[243] Fix | Delete

08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)

[244] Fix | Delete

[245] Fix | Delete

The parser ignores leading and trailing whitespace. The time may be

[246] Fix | Delete

absent.

[247] Fix | Delete

[248] Fix | Delete

If the year is given with only 2 digits, the function will select the

[249] Fix | Delete

century that makes the year closest to the current date.

[250] Fix | Delete

[251] Fix | Delete

"""

[252] Fix | Delete

# fast exit for strictly conforming string

[253] Fix | Delete

m = STRICT_DATE_RE.search(text)

[254] Fix | Delete

if m:

[255] Fix | Delete

g = m.groups()

[256] Fix | Delete

mon = MONTHS_LOWER.index(g[1].lower()) + 1

[257] Fix | Delete

tt = (int(g[2]), mon, int(g[0]),

[258] Fix | Delete

int(g[3]), int(g[4]), float(g[5]))

[259] Fix | Delete

return _timegm(tt)

[260] Fix | Delete

[261] Fix | Delete

# No, we need some messy parsing...

[262] Fix | Delete

[263] Fix | Delete

# clean up

[264] Fix | Delete

text = text.lstrip()

[265] Fix | Delete

text = WEEKDAY_RE.sub("", text, 1) # Useless weekday

[266] Fix | Delete

[267] Fix | Delete

# tz is time zone specifier string

[268] Fix | Delete

day, mon, yr, hr, min, sec, tz = [None]*7

[269] Fix | Delete

[270] Fix | Delete

# loose regexp parse

[271] Fix | Delete

m = LOOSE_HTTP_DATE_RE.search(text)

[272] Fix | Delete

if m is not None:

[273] Fix | Delete

day, mon, yr, hr, min, sec, tz = m.groups()

[274] Fix | Delete

else:

[275] Fix | Delete

return None # bad format

[276] Fix | Delete

[277] Fix | Delete

return _str2time(day, mon, yr, hr, min, sec, tz)

[278] Fix | Delete

[279] Fix | Delete

ISO_DATE_RE = re.compile(

[280] Fix | Delete

r"""^

[281] Fix | Delete

(\d{4}) # year

[282] Fix | Delete

[-\/]?

[283] Fix | Delete

(\d\d?) # numerical month

[284] Fix | Delete

[-\/]?

[285] Fix | Delete

(\d\d?) # day

[286] Fix | Delete

(?:

[287] Fix | Delete

(?:\s+|[-:Tt]) # separator before clock

[288] Fix | Delete

(\d\d?):?(\d\d) # hour:min

[289] Fix | Delete

(?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)

[290] Fix | Delete

)? # optional clock

[291] Fix | Delete

\s*

[292] Fix | Delete

(?:

[293] Fix | Delete

([-+]?\d\d?:?(:?\d\d)?

[294] Fix | Delete

|Z|z) # timezone (Z is "zero meridian", i.e. GMT)

[295] Fix | Delete

\s*

[296] Fix | Delete

)?$""", re.X | re. ASCII)

[297] Fix | Delete

def iso2time(text):

[298] Fix | Delete

"""

[299] Fix | Delete

As for http2time, but parses the ISO 8601 formats:

[300] Fix | Delete

[301] Fix | Delete

1994-02-03 14:15:29 -0100 -- ISO 8601 format

[302] Fix | Delete

1994-02-03 14:15:29 -- zone is optional

[303] Fix | Delete

1994-02-03 -- only date

[304] Fix | Delete

1994-02-03T14:15:29 -- Use T as separator

[305] Fix | Delete

19940203T141529Z -- ISO 8601 compact format

[306] Fix | Delete

19940203 -- only date

[307] Fix | Delete

[308] Fix | Delete

"""

[309] Fix | Delete

# clean up

[310] Fix | Delete

text = text.lstrip()

[311] Fix | Delete

[312] Fix | Delete

# tz is time zone specifier string

[313] Fix | Delete

day, mon, yr, hr, min, sec, tz = [None]*7

[314] Fix | Delete

[315] Fix | Delete

# loose regexp parse

[316] Fix | Delete

m = ISO_DATE_RE.search(text)

[317] Fix | Delete

if m is not None:

[318] Fix | Delete

# XXX there's an extra bit of the timezone I'm ignoring here: is

[319] Fix | Delete

# this the right thing to do?

[320] Fix | Delete

yr, mon, day, hr, min, sec, tz, _ = m.groups()

[321] Fix | Delete

else:

[322] Fix | Delete

return None # bad format

[323] Fix | Delete

[324] Fix | Delete

return _str2time(day, mon, yr, hr, min, sec, tz)

[325] Fix | Delete

[326] Fix | Delete

[327] Fix | Delete

# Header parsing

[328] Fix | Delete

# -----------------------------------------------------------------------------

[329] Fix | Delete

[330] Fix | Delete

def unmatched(match):

[331] Fix | Delete

"""Return unmatched part of re.Match object."""

[332] Fix | Delete

start, end = match.span(0)

[333] Fix | Delete

return match.string[:start]+match.string[end:]

[334] Fix | Delete

[335] Fix | Delete

HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")

[336] Fix | Delete

HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")

[337] Fix | Delete

HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")

[338] Fix | Delete

HEADER_ESCAPE_RE = re.compile(r"\\(.)")

[339] Fix | Delete

def split_header_words(header_values):

[340] Fix | Delete

r"""Parse header values into a list of lists containing key,value pairs.

[341] Fix | Delete

[342] Fix | Delete

The function knows how to deal with ",", ";" and "=" as well as quoted

[343] Fix | Delete

values after "=". A list of space separated tokens are parsed as if they

[344] Fix | Delete

were separated by ";".

[345] Fix | Delete

[346] Fix | Delete

If the header_values passed as argument contains multiple values, then they

[347] Fix | Delete

are treated as if they were a single value separated by comma ",".

[348] Fix | Delete

[349] Fix | Delete

This means that this function is useful for parsing header fields that

[350] Fix | Delete

follow this syntax (BNF as from the HTTP/1.1 specification, but we relax

[351] Fix | Delete

the requirement for tokens).

[352] Fix | Delete

[353] Fix | Delete

headers = #header

[354] Fix | Delete

header = (token | parameter) *( [";"] (token | parameter))

[355] Fix | Delete

[356] Fix | Delete

token = 1*<any CHAR except CTLs or separators>

[357] Fix | Delete

separators = "(" | ")" | "<" | ">" | "@"

[358] Fix | Delete

| "," | ";" | ":" | "\" | <">

[359] Fix | Delete

| "/" | "[" | "]" | "?" | "="

[360] Fix | Delete

| "{" | "}" | SP | HT

[361] Fix | Delete

[362] Fix | Delete

quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )

[363] Fix | Delete

qdtext = <any TEXT except <">>

[364] Fix | Delete

quoted-pair = "\" CHAR

[365] Fix | Delete

[366] Fix | Delete

parameter = attribute "=" value

[367] Fix | Delete

attribute = token

[368] Fix | Delete

value = token | quoted-string

[369] Fix | Delete

[370] Fix | Delete

Each header is represented by a list of key/value pairs. The value for a

[371] Fix | Delete

simple token (not part of a parameter) is None. Syntactically incorrect

[372] Fix | Delete

headers will not necessarily be parsed as you would want.

[373] Fix | Delete

[374] Fix | Delete

This is easier to describe with some examples:

[375] Fix | Delete

[376] Fix | Delete

>>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])

[377] Fix | Delete

[[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]

[378] Fix | Delete

>>> split_header_words(['text/html; charset="iso-8859-1"'])

[379] Fix | Delete

[[('text/html', None), ('charset', 'iso-8859-1')]]

[380] Fix | Delete

>>> split_header_words([r'Basic realm="\"foo\bar\""'])

[381] Fix | Delete

[[('Basic', None), ('realm', '"foobar"')]]

[382] Fix | Delete

[383] Fix | Delete

"""

[384] Fix | Delete

assert not isinstance(header_values, str)

[385] Fix | Delete

result = []

[386] Fix | Delete

for text in header_values:

[387] Fix | Delete

orig_text = text

[388] Fix | Delete

pairs = []

[389] Fix | Delete

while text:

[390] Fix | Delete

m = HEADER_TOKEN_RE.search(text)

[391] Fix | Delete

if m:

[392] Fix | Delete

text = unmatched(m)

[393] Fix | Delete

name = m.group(1)

[394] Fix | Delete

m = HEADER_QUOTED_VALUE_RE.search(text)

[395] Fix | Delete

if m: # quoted value

[396] Fix | Delete

text = unmatched(m)

[397] Fix | Delete

value = m.group(1)

[398] Fix | Delete

value = HEADER_ESCAPE_RE.sub(r"\1", value)

[399] Fix | Delete

else:

[400] Fix | Delete

m = HEADER_VALUE_RE.search(text)

[401] Fix | Delete

if m: # unquoted value

[402] Fix | Delete

text = unmatched(m)

[403] Fix | Delete

value = m.group(1)

[404] Fix | Delete

value = value.rstrip()

[405] Fix | Delete

else:

[406] Fix | Delete

# no value, a lone token

[407] Fix | Delete

value = None

[408] Fix | Delete

pairs.append((name, value))

[409] Fix | Delete

elif text.lstrip().startswith(","):

[410] Fix | Delete

# concatenated headers, as per RFC 2616 section 4.2

[411] Fix | Delete

text = text.lstrip()[1:]

[412] Fix | Delete

if pairs: result.append(pairs)

[413] Fix | Delete

pairs = []

[414] Fix | Delete

else:

[415] Fix | Delete

# skip junk

[416] Fix | Delete

non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)

[417] Fix | Delete

assert nr_junk_chars > 0, (

[418] Fix | Delete

"split_header_words bug: '%s', '%s', %s" %

[419] Fix | Delete

(orig_text, text, pairs))

[420] Fix | Delete

text = non_junk

[421] Fix | Delete

if pairs: result.append(pairs)

[422] Fix | Delete

return result

[423] Fix | Delete

[424] Fix | Delete

HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")

[425] Fix | Delete

def join_header_words(lists):

[426] Fix | Delete

"""Do the inverse (almost) of the conversion done by split_header_words.

[427] Fix | Delete

[428] Fix | Delete

Takes a list of lists of (key, value) pairs and produces a single header

[429] Fix | Delete

value. Attribute values are quoted if needed.

[430] Fix | Delete

[431] Fix | Delete

>>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])

[432] Fix | Delete

'text/plain; charset="iso-8859-1"'

[433] Fix | Delete

>>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])

[434] Fix | Delete

'text/plain, charset="iso-8859-1"'

[435] Fix | Delete

[436] Fix | Delete

"""

[437] Fix | Delete

headers = []

[438] Fix | Delete

for pairs in lists:

[439] Fix | Delete

attr = []

[440] Fix | Delete

for k, v in pairs:

[441] Fix | Delete

if v is not None:

[442] Fix | Delete

if not re.search(r"^\w+$", v):

[443] Fix | Delete

v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \

[444] Fix | Delete

v = '"%s"' % v

[445] Fix | Delete

k = "%s=%s" % (k, v)

[446] Fix | Delete

attr.append(k)

[447] Fix | Delete

if attr: headers.append("; ".join(attr))

[448] Fix | Delete

return ", ".join(headers)

[449] Fix | Delete

[450] Fix | Delete

def strip_quotes(text):

[451] Fix | Delete

if text.startswith('"'):

[452] Fix | Delete

text = text[1:]

[453] Fix | Delete

if text.endswith('"'):

[454] Fix | Delete

text = text[:-1]

[455] Fix | Delete

return text

[456] Fix | Delete

[457] Fix | Delete

def parse_ns_headers(ns_headers):

[458] Fix | Delete

"""Ad-hoc parser for Netscape protocol cookie-attributes.

[459] Fix | Delete

[460] Fix | Delete

The old Netscape cookie format for Set-Cookie can for instance contain

[461] Fix | Delete

an unquoted "," in the expires field, so we have to use this ad-hoc

[462] Fix | Delete

parser instead of split_header_words.

[463] Fix | Delete

[464] Fix | Delete

XXX This may not make the best possible effort to parse all the crap

[465] Fix | Delete

that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient

[466] Fix | Delete

parser is probably better, so could do worse than following that if

[467] Fix | Delete

this ever gives any trouble.

[468] Fix | Delete

[469] Fix | Delete

Currently, this is also used for parsing RFC 2109 cookies.

[470] Fix | Delete

[471] Fix | Delete

"""

[472] Fix | Delete

known_attrs = ("expires", "domain", "path", "secure",

[473] Fix | Delete

# RFC 2109 attrs (may turn up in Netscape cookies, too)

[474] Fix | Delete

"version", "port", "max-age")

[475] Fix | Delete

[476] Fix | Delete

result = []

[477] Fix | Delete

for ns_header in ns_headers:

[478] Fix | Delete

pairs = []

[479] Fix | Delete

version_set = False

[480] Fix | Delete

[481] Fix | Delete

# XXX: The following does not strictly adhere to RFCs in that empty

[482] Fix | Delete

# names and values are legal (the former will only appear once and will

[483] Fix | Delete

# be overwritten if multiple occurrences are present). This is

[484] Fix | Delete

# mostly to deal with backwards compatibility.

[485] Fix | Delete

for ii, param in enumerate(ns_header.split(';')):

[486] Fix | Delete

param = param.strip()

[487] Fix | Delete

[488] Fix | Delete

key, sep, val = param.partition('=')

[489] Fix | Delete

key = key.strip()

[490] Fix | Delete

[491] Fix | Delete

if not key:

[492] Fix | Delete

if ii == 0:

[493] Fix | Delete

break

[494] Fix | Delete

else:

[495] Fix | Delete

continue

[496] Fix | Delete

[497] Fix | Delete

# allow for a distinction between present and empty and missing

[498] Fix | Delete

# altogether

[499] Fix | Delete

12 3 4 5