Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../lib64/python2..../email
File: header.py
# Copyright (C) 2002-2006 Python Software Foundation
[0] Fix | Delete
# Author: Ben Gertzfield, Barry Warsaw
[1] Fix | Delete
# Contact: email-sig@python.org
[2] Fix | Delete
[3] Fix | Delete
"""Header encoding and decoding functionality."""
[4] Fix | Delete
[5] Fix | Delete
__all__ = [
[6] Fix | Delete
'Header',
[7] Fix | Delete
'decode_header',
[8] Fix | Delete
'make_header',
[9] Fix | Delete
]
[10] Fix | Delete
[11] Fix | Delete
import re
[12] Fix | Delete
import binascii
[13] Fix | Delete
[14] Fix | Delete
import email.quoprimime
[15] Fix | Delete
import email.base64mime
[16] Fix | Delete
[17] Fix | Delete
from email.errors import HeaderParseError
[18] Fix | Delete
from email.charset import Charset
[19] Fix | Delete
[20] Fix | Delete
NL = '\n'
[21] Fix | Delete
SPACE = ' '
[22] Fix | Delete
USPACE = u' '
[23] Fix | Delete
SPACE8 = ' ' * 8
[24] Fix | Delete
UEMPTYSTRING = u''
[25] Fix | Delete
[26] Fix | Delete
MAXLINELEN = 76
[27] Fix | Delete
[28] Fix | Delete
USASCII = Charset('us-ascii')
[29] Fix | Delete
UTF8 = Charset('utf-8')
[30] Fix | Delete
[31] Fix | Delete
# Match encoded-word strings in the form =?charset?q?Hello_World?=
[32] Fix | Delete
ecre = re.compile(r'''
[33] Fix | Delete
=\? # literal =?
[34] Fix | Delete
(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
[35] Fix | Delete
\? # literal ?
[36] Fix | Delete
(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
[37] Fix | Delete
\? # literal ?
[38] Fix | Delete
(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
[39] Fix | Delete
\?= # literal ?=
[40] Fix | Delete
(?=[ \t]|$) # whitespace or the end of the string
[41] Fix | Delete
''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)
[42] Fix | Delete
[43] Fix | Delete
# Field name regexp, including trailing colon, but not separating whitespace,
[44] Fix | Delete
# according to RFC 2822. Character range is from tilde to exclamation mark.
[45] Fix | Delete
# For use with .match()
[46] Fix | Delete
fcre = re.compile(r'[\041-\176]+:$')
[47] Fix | Delete
[48] Fix | Delete
# Find a header embedded in a putative header value. Used to check for
[49] Fix | Delete
# header injection attack.
[50] Fix | Delete
_embeded_header = re.compile(r'\n[^ \t]+:')
[51] Fix | Delete
[52] Fix | Delete
[53] Fix | Delete
[54] Fix | Delete
# Helpers
[55] Fix | Delete
_max_append = email.quoprimime._max_append
[56] Fix | Delete
[57] Fix | Delete
[58] Fix | Delete
[59] Fix | Delete
def decode_header(header):
[60] Fix | Delete
"""Decode a message header value without converting charset.
[61] Fix | Delete
[62] Fix | Delete
Returns a list of (decoded_string, charset) pairs containing each of the
[63] Fix | Delete
decoded parts of the header. Charset is None for non-encoded parts of the
[64] Fix | Delete
header, otherwise a lower-case string containing the name of the character
[65] Fix | Delete
set specified in the encoded string.
[66] Fix | Delete
[67] Fix | Delete
An email.errors.HeaderParseError may be raised when certain decoding error
[68] Fix | Delete
occurs (e.g. a base64 decoding exception).
[69] Fix | Delete
"""
[70] Fix | Delete
# If no encoding, just return the header
[71] Fix | Delete
header = str(header)
[72] Fix | Delete
if not ecre.search(header):
[73] Fix | Delete
return [(header, None)]
[74] Fix | Delete
decoded = []
[75] Fix | Delete
dec = ''
[76] Fix | Delete
for line in header.splitlines():
[77] Fix | Delete
# This line might not have an encoding in it
[78] Fix | Delete
if not ecre.search(line):
[79] Fix | Delete
decoded.append((line, None))
[80] Fix | Delete
continue
[81] Fix | Delete
parts = ecre.split(line)
[82] Fix | Delete
while parts:
[83] Fix | Delete
unenc = parts.pop(0).strip()
[84] Fix | Delete
if unenc:
[85] Fix | Delete
# Should we continue a long line?
[86] Fix | Delete
if decoded and decoded[-1][1] is None:
[87] Fix | Delete
decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
[88] Fix | Delete
else:
[89] Fix | Delete
decoded.append((unenc, None))
[90] Fix | Delete
if parts:
[91] Fix | Delete
charset, encoding = [s.lower() for s in parts[0:2]]
[92] Fix | Delete
encoded = parts[2]
[93] Fix | Delete
dec = None
[94] Fix | Delete
if encoding == 'q':
[95] Fix | Delete
dec = email.quoprimime.header_decode(encoded)
[96] Fix | Delete
elif encoding == 'b':
[97] Fix | Delete
paderr = len(encoded) % 4 # Postel's law: add missing padding
[98] Fix | Delete
if paderr:
[99] Fix | Delete
encoded += '==='[:4 - paderr]
[100] Fix | Delete
try:
[101] Fix | Delete
dec = email.base64mime.decode(encoded)
[102] Fix | Delete
except binascii.Error:
[103] Fix | Delete
# Turn this into a higher level exception. BAW: Right
[104] Fix | Delete
# now we throw the lower level exception away but
[105] Fix | Delete
# when/if we get exception chaining, we'll preserve it.
[106] Fix | Delete
raise HeaderParseError
[107] Fix | Delete
if dec is None:
[108] Fix | Delete
dec = encoded
[109] Fix | Delete
[110] Fix | Delete
if decoded and decoded[-1][1] == charset:
[111] Fix | Delete
decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
[112] Fix | Delete
else:
[113] Fix | Delete
decoded.append((dec, charset))
[114] Fix | Delete
del parts[0:3]
[115] Fix | Delete
return decoded
[116] Fix | Delete
[117] Fix | Delete
[118] Fix | Delete
[119] Fix | Delete
def make_header(decoded_seq, maxlinelen=None, header_name=None,
[120] Fix | Delete
continuation_ws=' '):
[121] Fix | Delete
"""Create a Header from a sequence of pairs as returned by decode_header()
[122] Fix | Delete
[123] Fix | Delete
decode_header() takes a header value string and returns a sequence of
[124] Fix | Delete
pairs of the format (decoded_string, charset) where charset is the string
[125] Fix | Delete
name of the character set.
[126] Fix | Delete
[127] Fix | Delete
This function takes one of those sequence of pairs and returns a Header
[128] Fix | Delete
instance. Optional maxlinelen, header_name, and continuation_ws are as in
[129] Fix | Delete
the Header constructor.
[130] Fix | Delete
"""
[131] Fix | Delete
h = Header(maxlinelen=maxlinelen, header_name=header_name,
[132] Fix | Delete
continuation_ws=continuation_ws)
[133] Fix | Delete
for s, charset in decoded_seq:
[134] Fix | Delete
# None means us-ascii but we can simply pass it on to h.append()
[135] Fix | Delete
if charset is not None and not isinstance(charset, Charset):
[136] Fix | Delete
charset = Charset(charset)
[137] Fix | Delete
h.append(s, charset)
[138] Fix | Delete
return h
[139] Fix | Delete
[140] Fix | Delete
[141] Fix | Delete
[142] Fix | Delete
class Header:
[143] Fix | Delete
def __init__(self, s=None, charset=None,
[144] Fix | Delete
maxlinelen=None, header_name=None,
[145] Fix | Delete
continuation_ws=' ', errors='strict'):
[146] Fix | Delete
"""Create a MIME-compliant header that can contain many character sets.
[147] Fix | Delete
[148] Fix | Delete
Optional s is the initial header value. If None, the initial header
[149] Fix | Delete
value is not set. You can later append to the header with .append()
[150] Fix | Delete
method calls. s may be a byte string or a Unicode string, but see the
[151] Fix | Delete
.append() documentation for semantics.
[152] Fix | Delete
[153] Fix | Delete
Optional charset serves two purposes: it has the same meaning as the
[154] Fix | Delete
charset argument to the .append() method. It also sets the default
[155] Fix | Delete
character set for all subsequent .append() calls that omit the charset
[156] Fix | Delete
argument. If charset is not provided in the constructor, the us-ascii
[157] Fix | Delete
charset is used both as s's initial charset and as the default for
[158] Fix | Delete
subsequent .append() calls.
[159] Fix | Delete
[160] Fix | Delete
The maximum line length can be specified explicit via maxlinelen. For
[161] Fix | Delete
splitting the first line to a shorter value (to account for the field
[162] Fix | Delete
header which isn't included in s, e.g. `Subject') pass in the name of
[163] Fix | Delete
the field in header_name. The default maxlinelen is 76.
[164] Fix | Delete
[165] Fix | Delete
continuation_ws must be RFC 2822 compliant folding whitespace (usually
[166] Fix | Delete
either a space or a hard tab) which will be prepended to continuation
[167] Fix | Delete
lines.
[168] Fix | Delete
[169] Fix | Delete
errors is passed through to the .append() call.
[170] Fix | Delete
"""
[171] Fix | Delete
if charset is None:
[172] Fix | Delete
charset = USASCII
[173] Fix | Delete
if not isinstance(charset, Charset):
[174] Fix | Delete
charset = Charset(charset)
[175] Fix | Delete
self._charset = charset
[176] Fix | Delete
self._continuation_ws = continuation_ws
[177] Fix | Delete
cws_expanded_len = len(continuation_ws.replace('\t', SPACE8))
[178] Fix | Delete
# BAW: I believe `chunks' and `maxlinelen' should be non-public.
[179] Fix | Delete
self._chunks = []
[180] Fix | Delete
if s is not None:
[181] Fix | Delete
self.append(s, charset, errors)
[182] Fix | Delete
if maxlinelen is None:
[183] Fix | Delete
maxlinelen = MAXLINELEN
[184] Fix | Delete
if header_name is None:
[185] Fix | Delete
# We don't know anything about the field header so the first line
[186] Fix | Delete
# is the same length as subsequent lines.
[187] Fix | Delete
self._firstlinelen = maxlinelen
[188] Fix | Delete
else:
[189] Fix | Delete
# The first line should be shorter to take into account the field
[190] Fix | Delete
# header. Also subtract off 2 extra for the colon and space.
[191] Fix | Delete
self._firstlinelen = maxlinelen - len(header_name) - 2
[192] Fix | Delete
# Second and subsequent lines should subtract off the length in
[193] Fix | Delete
# columns of the continuation whitespace prefix.
[194] Fix | Delete
self._maxlinelen = maxlinelen - cws_expanded_len
[195] Fix | Delete
[196] Fix | Delete
def __str__(self):
[197] Fix | Delete
"""A synonym for self.encode()."""
[198] Fix | Delete
return self.encode()
[199] Fix | Delete
[200] Fix | Delete
def __unicode__(self):
[201] Fix | Delete
"""Helper for the built-in unicode function."""
[202] Fix | Delete
uchunks = []
[203] Fix | Delete
lastcs = None
[204] Fix | Delete
for s, charset in self._chunks:
[205] Fix | Delete
# We must preserve spaces between encoded and non-encoded word
[206] Fix | Delete
# boundaries, which means for us we need to add a space when we go
[207] Fix | Delete
# from a charset to None/us-ascii, or from None/us-ascii to a
[208] Fix | Delete
# charset. Only do this for the second and subsequent chunks.
[209] Fix | Delete
nextcs = charset
[210] Fix | Delete
if uchunks:
[211] Fix | Delete
if lastcs not in (None, 'us-ascii'):
[212] Fix | Delete
if nextcs in (None, 'us-ascii'):
[213] Fix | Delete
uchunks.append(USPACE)
[214] Fix | Delete
nextcs = None
[215] Fix | Delete
elif nextcs not in (None, 'us-ascii'):
[216] Fix | Delete
uchunks.append(USPACE)
[217] Fix | Delete
lastcs = nextcs
[218] Fix | Delete
uchunks.append(unicode(s, str(charset)))
[219] Fix | Delete
return UEMPTYSTRING.join(uchunks)
[220] Fix | Delete
[221] Fix | Delete
# Rich comparison operators for equality only. BAW: does it make sense to
[222] Fix | Delete
# have or explicitly disable <, <=, >, >= operators?
[223] Fix | Delete
def __eq__(self, other):
[224] Fix | Delete
# other may be a Header or a string. Both are fine so coerce
[225] Fix | Delete
# ourselves to a string, swap the args and do another comparison.
[226] Fix | Delete
return other == self.encode()
[227] Fix | Delete
[228] Fix | Delete
def __ne__(self, other):
[229] Fix | Delete
return not self == other
[230] Fix | Delete
[231] Fix | Delete
def append(self, s, charset=None, errors='strict'):
[232] Fix | Delete
"""Append a string to the MIME header.
[233] Fix | Delete
[234] Fix | Delete
Optional charset, if given, should be a Charset instance or the name
[235] Fix | Delete
of a character set (which will be converted to a Charset instance). A
[236] Fix | Delete
value of None (the default) means that the charset given in the
[237] Fix | Delete
constructor is used.
[238] Fix | Delete
[239] Fix | Delete
s may be a byte string or a Unicode string. If it is a byte string
[240] Fix | Delete
(i.e. isinstance(s, str) is true), then charset is the encoding of
[241] Fix | Delete
that byte string, and a UnicodeError will be raised if the string
[242] Fix | Delete
cannot be decoded with that charset. If s is a Unicode string, then
[243] Fix | Delete
charset is a hint specifying the character set of the characters in
[244] Fix | Delete
the string. In this case, when producing an RFC 2822 compliant header
[245] Fix | Delete
using RFC 2047 rules, the Unicode string will be encoded using the
[246] Fix | Delete
following charsets in order: us-ascii, the charset hint, utf-8. The
[247] Fix | Delete
first character set not to provoke a UnicodeError is used.
[248] Fix | Delete
[249] Fix | Delete
Optional `errors' is passed as the third argument to any unicode() or
[250] Fix | Delete
ustr.encode() call.
[251] Fix | Delete
"""
[252] Fix | Delete
if charset is None:
[253] Fix | Delete
charset = self._charset
[254] Fix | Delete
elif not isinstance(charset, Charset):
[255] Fix | Delete
charset = Charset(charset)
[256] Fix | Delete
# If the charset is our faux 8bit charset, leave the string unchanged
[257] Fix | Delete
if charset != '8bit':
[258] Fix | Delete
# We need to test that the string can be converted to unicode and
[259] Fix | Delete
# back to a byte string, given the input and output codecs of the
[260] Fix | Delete
# charset.
[261] Fix | Delete
if isinstance(s, str):
[262] Fix | Delete
# Possibly raise UnicodeError if the byte string can't be
[263] Fix | Delete
# converted to a unicode with the input codec of the charset.
[264] Fix | Delete
incodec = charset.input_codec or 'us-ascii'
[265] Fix | Delete
ustr = unicode(s, incodec, errors)
[266] Fix | Delete
# Now make sure that the unicode could be converted back to a
[267] Fix | Delete
# byte string with the output codec, which may be different
[268] Fix | Delete
# than the iput coded. Still, use the original byte string.
[269] Fix | Delete
outcodec = charset.output_codec or 'us-ascii'
[270] Fix | Delete
ustr.encode(outcodec, errors)
[271] Fix | Delete
elif isinstance(s, unicode):
[272] Fix | Delete
# Now we have to be sure the unicode string can be converted
[273] Fix | Delete
# to a byte string with a reasonable output codec. We want to
[274] Fix | Delete
# use the byte string in the chunk.
[275] Fix | Delete
for charset in USASCII, charset, UTF8:
[276] Fix | Delete
try:
[277] Fix | Delete
outcodec = charset.output_codec or 'us-ascii'
[278] Fix | Delete
s = s.encode(outcodec, errors)
[279] Fix | Delete
break
[280] Fix | Delete
except UnicodeError:
[281] Fix | Delete
pass
[282] Fix | Delete
else:
[283] Fix | Delete
assert False, 'utf-8 conversion failed'
[284] Fix | Delete
self._chunks.append((s, charset))
[285] Fix | Delete
[286] Fix | Delete
def _split(self, s, charset, maxlinelen, splitchars):
[287] Fix | Delete
# Split up a header safely for use with encode_chunks.
[288] Fix | Delete
splittable = charset.to_splittable(s)
[289] Fix | Delete
encoded = charset.from_splittable(splittable, True)
[290] Fix | Delete
elen = charset.encoded_header_len(encoded)
[291] Fix | Delete
# If the line's encoded length first, just return it
[292] Fix | Delete
if elen <= maxlinelen:
[293] Fix | Delete
return [(encoded, charset)]
[294] Fix | Delete
# If we have undetermined raw 8bit characters sitting in a byte
[295] Fix | Delete
# string, we really don't know what the right thing to do is. We
[296] Fix | Delete
# can't really split it because it might be multibyte data which we
[297] Fix | Delete
# could break if we split it between pairs. The least harm seems to
[298] Fix | Delete
# be to not split the header at all, but that means they could go out
[299] Fix | Delete
# longer than maxlinelen.
[300] Fix | Delete
if charset == '8bit':
[301] Fix | Delete
return [(s, charset)]
[302] Fix | Delete
# BAW: I'm not sure what the right test here is. What we're trying to
[303] Fix | Delete
# do is be faithful to RFC 2822's recommendation that ($2.2.3):
[304] Fix | Delete
#
[305] Fix | Delete
# "Note: Though structured field bodies are defined in such a way that
[306] Fix | Delete
# folding can take place between many of the lexical tokens (and even
[307] Fix | Delete
# within some of the lexical tokens), folding SHOULD be limited to
[308] Fix | Delete
# placing the CRLF at higher-level syntactic breaks."
[309] Fix | Delete
#
[310] Fix | Delete
# For now, I can only imagine doing this when the charset is us-ascii,
[311] Fix | Delete
# although it's possible that other charsets may also benefit from the
[312] Fix | Delete
# higher-level syntactic breaks.
[313] Fix | Delete
elif charset == 'us-ascii':
[314] Fix | Delete
return self._split_ascii(s, charset, maxlinelen, splitchars)
[315] Fix | Delete
# BAW: should we use encoded?
[316] Fix | Delete
elif elen == len(s):
[317] Fix | Delete
# We can split on _maxlinelen boundaries because we know that the
[318] Fix | Delete
# encoding won't change the size of the string
[319] Fix | Delete
splitpnt = maxlinelen
[320] Fix | Delete
first = charset.from_splittable(splittable[:splitpnt], False)
[321] Fix | Delete
last = charset.from_splittable(splittable[splitpnt:], False)
[322] Fix | Delete
else:
[323] Fix | Delete
# Binary search for split point
[324] Fix | Delete
first, last = _binsplit(splittable, charset, maxlinelen)
[325] Fix | Delete
# first is of the proper length so just wrap it in the appropriate
[326] Fix | Delete
# chrome. last must be recursively split.
[327] Fix | Delete
fsplittable = charset.to_splittable(first)
[328] Fix | Delete
fencoded = charset.from_splittable(fsplittable, True)
[329] Fix | Delete
chunk = [(fencoded, charset)]
[330] Fix | Delete
return chunk + self._split(last, charset, self._maxlinelen, splitchars)
[331] Fix | Delete
[332] Fix | Delete
def _split_ascii(self, s, charset, firstlen, splitchars):
[333] Fix | Delete
chunks = _split_ascii(s, firstlen, self._maxlinelen,
[334] Fix | Delete
self._continuation_ws, splitchars)
[335] Fix | Delete
return zip(chunks, [charset]*len(chunks))
[336] Fix | Delete
[337] Fix | Delete
def _encode_chunks(self, newchunks, maxlinelen):
[338] Fix | Delete
# MIME-encode a header with many different charsets and/or encodings.
[339] Fix | Delete
#
[340] Fix | Delete
# Given a list of pairs (string, charset), return a MIME-encoded
[341] Fix | Delete
# string suitable for use in a header field. Each pair may have
[342] Fix | Delete
# different charsets and/or encodings, and the resulting header will
[343] Fix | Delete
# accurately reflect each setting.
[344] Fix | Delete
#
[345] Fix | Delete
# Each encoding can be email.utils.QP (quoted-printable, for
[346] Fix | Delete
# ASCII-like character sets like iso-8859-1), email.utils.BASE64
[347] Fix | Delete
# (Base64, for non-ASCII like character sets like KOI8-R and
[348] Fix | Delete
# iso-2022-jp), or None (no encoding).
[349] Fix | Delete
#
[350] Fix | Delete
# Each pair will be represented on a separate line; the resulting
[351] Fix | Delete
# string will be in the format:
[352] Fix | Delete
#
[353] Fix | Delete
# =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
[354] Fix | Delete
# =?charset2?b?SvxyZ2VuIEL2aW5n?="
[355] Fix | Delete
chunks = []
[356] Fix | Delete
for header, charset in newchunks:
[357] Fix | Delete
if not header:
[358] Fix | Delete
continue
[359] Fix | Delete
if charset is None or charset.header_encoding is None:
[360] Fix | Delete
s = header
[361] Fix | Delete
else:
[362] Fix | Delete
s = charset.header_encode(header)
[363] Fix | Delete
# Don't add more folding whitespace than necessary
[364] Fix | Delete
if chunks and chunks[-1].endswith(' '):
[365] Fix | Delete
extra = ''
[366] Fix | Delete
else:
[367] Fix | Delete
extra = ' '
[368] Fix | Delete
_max_append(chunks, s, maxlinelen, extra)
[369] Fix | Delete
joiner = NL + self._continuation_ws
[370] Fix | Delete
return joiner.join(chunks)
[371] Fix | Delete
[372] Fix | Delete
def encode(self, splitchars=';, '):
[373] Fix | Delete
"""Encode a message header into an RFC-compliant format.
[374] Fix | Delete
[375] Fix | Delete
There are many issues involved in converting a given string for use in
[376] Fix | Delete
an email header. Only certain character sets are readable in most
[377] Fix | Delete
email clients, and as header strings can only contain a subset of
[378] Fix | Delete
7-bit ASCII, care must be taken to properly convert and encode (with
[379] Fix | Delete
Base64 or quoted-printable) header strings. In addition, there is a
[380] Fix | Delete
75-character length limit on any given encoded header field, so
[381] Fix | Delete
line-wrapping must be performed, even with double-byte character sets.
[382] Fix | Delete
[383] Fix | Delete
This method will do its best to convert the string to the correct
[384] Fix | Delete
character set used in email, and encode and line wrap it safely with
[385] Fix | Delete
the appropriate scheme for that character set.
[386] Fix | Delete
[387] Fix | Delete
If the given charset is not known or an error occurs during
[388] Fix | Delete
conversion, this function will return the header untouched.
[389] Fix | Delete
[390] Fix | Delete
Optional splitchars is a string containing characters to split long
[391] Fix | Delete
ASCII lines on, in rough support of RFC 2822's `highest level
[392] Fix | Delete
syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
[393] Fix | Delete
"""
[394] Fix | Delete
newchunks = []
[395] Fix | Delete
maxlinelen = self._firstlinelen
[396] Fix | Delete
lastlen = 0
[397] Fix | Delete
for s, charset in self._chunks:
[398] Fix | Delete
# The first bit of the next chunk should be just long enough to
[399] Fix | Delete
# fill the next line. Don't forget the space separating the
[400] Fix | Delete
# encoded words.
[401] Fix | Delete
targetlen = maxlinelen - lastlen - 1
[402] Fix | Delete
if targetlen < charset.encoded_header_len(''):
[403] Fix | Delete
# Stick it on the next line
[404] Fix | Delete
targetlen = maxlinelen
[405] Fix | Delete
newchunks += self._split(s, charset, targetlen, splitchars)
[406] Fix | Delete
lastchunk, lastcharset = newchunks[-1]
[407] Fix | Delete
lastlen = lastcharset.encoded_header_len(lastchunk)
[408] Fix | Delete
value = self._encode_chunks(newchunks, maxlinelen)
[409] Fix | Delete
if _embeded_header.search(value):
[410] Fix | Delete
raise HeaderParseError("header value appears to contain "
[411] Fix | Delete
"an embedded header: {!r}".format(value))
[412] Fix | Delete
return value
[413] Fix | Delete
[414] Fix | Delete
[415] Fix | Delete
[416] Fix | Delete
def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
[417] Fix | Delete
lines = []
[418] Fix | Delete
maxlen = firstlen
[419] Fix | Delete
for line in s.splitlines():
[420] Fix | Delete
# Ignore any leading whitespace (i.e. continuation whitespace) already
[421] Fix | Delete
# on the line, since we'll be adding our own.
[422] Fix | Delete
line = line.lstrip()
[423] Fix | Delete
if len(line) < maxlen:
[424] Fix | Delete
lines.append(line)
[425] Fix | Delete
maxlen = restlen
[426] Fix | Delete
continue
[427] Fix | Delete
# Attempt to split the line at the highest-level syntactic break
[428] Fix | Delete
# possible. Note that we don't have a lot of smarts about field
[429] Fix | Delete
# syntax; we just try to break on semi-colons, then commas, then
[430] Fix | Delete
# whitespace.
[431] Fix | Delete
for ch in splitchars:
[432] Fix | Delete
if ch in line:
[433] Fix | Delete
break
[434] Fix | Delete
else:
[435] Fix | Delete
# There's nothing useful to split the line on, not even spaces, so
[436] Fix | Delete
# just append this line unchanged
[437] Fix | Delete
lines.append(line)
[438] Fix | Delete
maxlen = restlen
[439] Fix | Delete
continue
[440] Fix | Delete
# Now split the line on the character plus trailing whitespace
[441] Fix | Delete
cre = re.compile(r'%s\s*' % ch)
[442] Fix | Delete
if ch in ';,':
[443] Fix | Delete
eol = ch
[444] Fix | Delete
else:
[445] Fix | Delete
eol = ''
[446] Fix | Delete
joiner = eol + ' '
[447] Fix | Delete
joinlen = len(joiner)
[448] Fix | Delete
wslen = len(continuation_ws.replace('\t', SPACE8))
[449] Fix | Delete
this = []
[450] Fix | Delete
linelen = 0
[451] Fix | Delete
for part in cre.split(line):
[452] Fix | Delete
curlen = linelen + max(0, len(this)-1) * joinlen
[453] Fix | Delete
partlen = len(part)
[454] Fix | Delete
onfirstline = not lines
[455] Fix | Delete
# We don't want to split after the field name, if we're on the
[456] Fix | Delete
# first line and the field name is present in the header string.
[457] Fix | Delete
if ch == ' ' and onfirstline and \
[458] Fix | Delete
len(this) == 1 and fcre.match(this[0]):
[459] Fix | Delete
this.append(part)
[460] Fix | Delete
linelen += partlen
[461] Fix | Delete
elif curlen + partlen > maxlen:
[462] Fix | Delete
if this:
[463] Fix | Delete
lines.append(joiner.join(this) + eol)
[464] Fix | Delete
# If this part is longer than maxlen and we aren't already
[465] Fix | Delete
# splitting on whitespace, try to recursively split this line
[466] Fix | Delete
# on whitespace.
[467] Fix | Delete
if partlen > maxlen and ch != ' ':
[468] Fix | Delete
subl = _split_ascii(part, maxlen, restlen,
[469] Fix | Delete
continuation_ws, ' ')
[470] Fix | Delete
lines.extend(subl[:-1])
[471] Fix | Delete
this = [subl[-1]]
[472] Fix | Delete
else:
[473] Fix | Delete
this = [part]
[474] Fix | Delete
linelen = wslen + len(this[-1])
[475] Fix | Delete
maxlen = restlen
[476] Fix | Delete
else:
[477] Fix | Delete
this.append(part)
[478] Fix | Delete
linelen += partlen
[479] Fix | Delete
# Put any left over parts on a line by themselves
[480] Fix | Delete
if this:
[481] Fix | Delete
lines.append(joiner.join(this))
[482] Fix | Delete
return lines
[483] Fix | Delete
[484] Fix | Delete
[485] Fix | Delete
[486] Fix | Delete
def _binsplit(splittable, charset, maxlinelen):
[487] Fix | Delete
i = 0
[488] Fix | Delete
j = len(splittable)
[489] Fix | Delete
while i < j:
[490] Fix | Delete
# Invariants:
[491] Fix | Delete
# 1. splittable[:k] fits for all k <= i (note that we *assume*,
[492] Fix | Delete
# at the start, that splittable[:0] fits).
[493] Fix | Delete
# 2. splittable[:k] does not fit for any k > j (at the start,
[494] Fix | Delete
# this means we shouldn't look at any k > len(splittable)).
[495] Fix | Delete
# 3. We don't know about splittable[:k] for k in i+1..j.
[496] Fix | Delete
# 4. We want to set i to the largest k that fits, with i <= k <= j.
[497] Fix | Delete
#
[498] Fix | Delete
m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
[499] Fix | Delete
12
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function