Edit File by line

[0] Fix | Delete

# Author: Ben Gertzfield, Barry Warsaw

[1] Fix | Delete

# Contact: email-sig@python.org

[2] Fix | Delete

[3] Fix | Delete

"""Header encoding and decoding functionality."""

[4] Fix | Delete

[5] Fix | Delete

__all__ = [

[6] Fix | Delete

'Header',

[7] Fix | Delete

'decode_header',

[8] Fix | Delete

'make_header',

[9] Fix | Delete

]

[10] Fix | Delete

[11] Fix | Delete

import re

[12] Fix | Delete

import binascii

[13] Fix | Delete

[14] Fix | Delete

import email.quoprimime

[15] Fix | Delete

import email.base64mime

[16] Fix | Delete

[17] Fix | Delete

from email.errors import HeaderParseError

[18] Fix | Delete

from email import charset as _charset

[19] Fix | Delete

Charset = _charset.Charset

[20] Fix | Delete

[21] Fix | Delete

NL = '\n'

[22] Fix | Delete

SPACE = ' '

[23] Fix | Delete

BSPACE = b' '

[24] Fix | Delete

SPACE8 = ' ' * 8

[25] Fix | Delete

EMPTYSTRING = ''

[26] Fix | Delete

MAXLINELEN = 78

[27] Fix | Delete

FWS = ' \t'

[28] Fix | Delete

[29] Fix | Delete

USASCII = Charset('us-ascii')

[30] Fix | Delete

UTF8 = Charset('utf-8')

[31] Fix | Delete

[32] Fix | Delete

# Match encoded-word strings in the form =?charset?q?Hello_World?=

[33] Fix | Delete

ecre = re.compile(r'''

[34] Fix | Delete

=\? # literal =?

[35] Fix | Delete

(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset

[36] Fix | Delete

\? # literal ?

[37] Fix | Delete

(?P<encoding>[qb]) # either a "q" or a "b", case insensitive

[38] Fix | Delete

\? # literal ?

[39] Fix | Delete

(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string

[40] Fix | Delete

\?= # literal ?=

[41] Fix | Delete

''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)

[42] Fix | Delete

[43] Fix | Delete

# Field name regexp, including trailing colon, but not separating whitespace,

[44] Fix | Delete

# according to RFC 2822. Character range is from tilde to exclamation mark.

[45] Fix | Delete

# For use with .match()

[46] Fix | Delete

fcre = re.compile(r'[\041-\176]+:$')

[47] Fix | Delete

[48] Fix | Delete

# Find a header embedded in a putative header value. Used to check for

[49] Fix | Delete

# header injection attack.

[50] Fix | Delete

_embedded_header = re.compile(r'\n[^ \t]+:')

[51] Fix | Delete

[52] Fix | Delete

[53] Fix | Delete

[54] Fix | Delete

# Helpers

[55] Fix | Delete

_max_append = email.quoprimime._max_append

[56] Fix | Delete

[57] Fix | Delete

[58] Fix | Delete

[59] Fix | Delete

def decode_header(header):

[60] Fix | Delete

"""Decode a message header value without converting charset.

[61] Fix | Delete

[62] Fix | Delete

Returns a list of (string, charset) pairs containing each of the decoded

[63] Fix | Delete

parts of the header. Charset is None for non-encoded parts of the header,

[64] Fix | Delete

otherwise a lower-case string containing the name of the character set

[65] Fix | Delete

specified in the encoded string.

[66] Fix | Delete

[67] Fix | Delete

header may be a string that may or may not contain RFC2047 encoded words,

[68] Fix | Delete

or it may be a Header object.

[69] Fix | Delete

[70] Fix | Delete

An email.errors.HeaderParseError may be raised when certain decoding error

[71] Fix | Delete

occurs (e.g. a base64 decoding exception).

[72] Fix | Delete

"""

[73] Fix | Delete

# If it is a Header object, we can just return the encoded chunks.

[74] Fix | Delete

if hasattr(header, '_chunks'):

[75] Fix | Delete

return [(_charset._encode(string, str(charset)), str(charset))

[76] Fix | Delete

for string, charset in header._chunks]

[77] Fix | Delete

# If no encoding, just return the header with no charset.

[78] Fix | Delete

if not ecre.search(header):

[79] Fix | Delete

return [(header, None)]

[80] Fix | Delete

# First step is to parse all the encoded parts into triplets of the form

[81] Fix | Delete

# (encoded_string, encoding, charset). For unencoded strings, the last

[82] Fix | Delete

# two parts will be None.

[83] Fix | Delete

words = []

[84] Fix | Delete

for line in header.splitlines():

[85] Fix | Delete

parts = ecre.split(line)

[86] Fix | Delete

first = True

[87] Fix | Delete

while parts:

[88] Fix | Delete

unencoded = parts.pop(0)

[89] Fix | Delete

if first:

[90] Fix | Delete

unencoded = unencoded.lstrip()

[91] Fix | Delete

first = False

[92] Fix | Delete

if unencoded:

[93] Fix | Delete

words.append((unencoded, None, None))

[94] Fix | Delete

if parts:

[95] Fix | Delete

charset = parts.pop(0).lower()

[96] Fix | Delete

encoding = parts.pop(0).lower()

[97] Fix | Delete

encoded = parts.pop(0)

[98] Fix | Delete

words.append((encoded, encoding, charset))

[99] Fix | Delete

# Now loop over words and remove words that consist of whitespace

[100] Fix | Delete

# between two encoded strings.

[101] Fix | Delete

droplist = []

[102] Fix | Delete

for n, w in enumerate(words):

[103] Fix | Delete

if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():

[104] Fix | Delete

droplist.append(n-1)

[105] Fix | Delete

for d in reversed(droplist):

[106] Fix | Delete

del words[d]

[107] Fix | Delete

[108] Fix | Delete

# The next step is to decode each encoded word by applying the reverse

[109] Fix | Delete

# base64 or quopri transformation. decoded_words is now a list of the

[110] Fix | Delete

# form (decoded_word, charset).

[111] Fix | Delete

decoded_words = []

[112] Fix | Delete

for encoded_string, encoding, charset in words:

[113] Fix | Delete

if encoding is None:

[114] Fix | Delete

# This is an unencoded word.

[115] Fix | Delete

decoded_words.append((encoded_string, charset))

[116] Fix | Delete

elif encoding == 'q':

[117] Fix | Delete

word = email.quoprimime.header_decode(encoded_string)

[118] Fix | Delete

decoded_words.append((word, charset))

[119] Fix | Delete

elif encoding == 'b':

[120] Fix | Delete

paderr = len(encoded_string) % 4 # Postel's law: add missing padding

[121] Fix | Delete

if paderr:

[122] Fix | Delete

encoded_string += '==='[:4 - paderr]

[123] Fix | Delete

try:

[124] Fix | Delete

word = email.base64mime.decode(encoded_string)

[125] Fix | Delete

except binascii.Error:

[126] Fix | Delete

raise HeaderParseError('Base64 decoding error')

[127] Fix | Delete

else:

[128] Fix | Delete

decoded_words.append((word, charset))

[129] Fix | Delete

else:

[130] Fix | Delete

raise AssertionError('Unexpected encoding: ' + encoding)

[131] Fix | Delete

# Now convert all words to bytes and collapse consecutive runs of

[132] Fix | Delete

# similarly encoded words.

[133] Fix | Delete

collapsed = []

[134] Fix | Delete

last_word = last_charset = None

[135] Fix | Delete

for word, charset in decoded_words:

[136] Fix | Delete

if isinstance(word, str):

[137] Fix | Delete

word = bytes(word, 'raw-unicode-escape')

[138] Fix | Delete

if last_word is None:

[139] Fix | Delete

last_word = word

[140] Fix | Delete

last_charset = charset

[141] Fix | Delete

elif charset != last_charset:

[142] Fix | Delete

collapsed.append((last_word, last_charset))

[143] Fix | Delete

last_word = word

[144] Fix | Delete

last_charset = charset

[145] Fix | Delete

elif last_charset is None:

[146] Fix | Delete

last_word += BSPACE + word

[147] Fix | Delete

else:

[148] Fix | Delete

last_word += word

[149] Fix | Delete

collapsed.append((last_word, last_charset))

[150] Fix | Delete

return collapsed

[151] Fix | Delete

[152] Fix | Delete

[153] Fix | Delete

[154] Fix | Delete

def make_header(decoded_seq, maxlinelen=None, header_name=None,

[155] Fix | Delete

continuation_ws=' '):

[156] Fix | Delete

"""Create a Header from a sequence of pairs as returned by decode_header()

[157] Fix | Delete

[158] Fix | Delete

decode_header() takes a header value string and returns a sequence of

[159] Fix | Delete

pairs of the format (decoded_string, charset) where charset is the string

[160] Fix | Delete

name of the character set.

[161] Fix | Delete

[162] Fix | Delete

This function takes one of those sequence of pairs and returns a Header

[163] Fix | Delete

instance. Optional maxlinelen, header_name, and continuation_ws are as in

[164] Fix | Delete

the Header constructor.

[165] Fix | Delete

"""

[166] Fix | Delete

h = Header(maxlinelen=maxlinelen, header_name=header_name,

[167] Fix | Delete

continuation_ws=continuation_ws)

[168] Fix | Delete

for s, charset in decoded_seq:

[169] Fix | Delete

# None means us-ascii but we can simply pass it on to h.append()

[170] Fix | Delete

if charset is not None and not isinstance(charset, Charset):

[171] Fix | Delete

charset = Charset(charset)

[172] Fix | Delete

h.append(s, charset)

[173] Fix | Delete

return h

[174] Fix | Delete

[175] Fix | Delete

[176] Fix | Delete

[177] Fix | Delete

class Header:

[178] Fix | Delete

def __init__(self, s=None, charset=None,

[179] Fix | Delete

maxlinelen=None, header_name=None,

[180] Fix | Delete

continuation_ws=' ', errors='strict'):

[181] Fix | Delete

"""Create a MIME-compliant header that can contain many character sets.

[182] Fix | Delete

[183] Fix | Delete

Optional s is the initial header value. If None, the initial header

[184] Fix | Delete

value is not set. You can later append to the header with .append()

[185] Fix | Delete

method calls. s may be a byte string or a Unicode string, but see the

[186] Fix | Delete

.append() documentation for semantics.

[187] Fix | Delete

[188] Fix | Delete

Optional charset serves two purposes: it has the same meaning as the

[189] Fix | Delete

charset argument to the .append() method. It also sets the default

[190] Fix | Delete

character set for all subsequent .append() calls that omit the charset

[191] Fix | Delete

argument. If charset is not provided in the constructor, the us-ascii

[192] Fix | Delete

charset is used both as s's initial charset and as the default for

[193] Fix | Delete

subsequent .append() calls.

[194] Fix | Delete

[195] Fix | Delete

The maximum line length can be specified explicitly via maxlinelen. For

[196] Fix | Delete

splitting the first line to a shorter value (to account for the field

[197] Fix | Delete

header which isn't included in s, e.g. `Subject') pass in the name of

[198] Fix | Delete

the field in header_name. The default maxlinelen is 78 as recommended

[199] Fix | Delete

by RFC 2822.

[200] Fix | Delete

[201] Fix | Delete

continuation_ws must be RFC 2822 compliant folding whitespace (usually

[202] Fix | Delete

either a space or a hard tab) which will be prepended to continuation

[203] Fix | Delete

lines.

[204] Fix | Delete

[205] Fix | Delete

errors is passed through to the .append() call.

[206] Fix | Delete

"""

[207] Fix | Delete

if charset is None:

[208] Fix | Delete

charset = USASCII

[209] Fix | Delete

elif not isinstance(charset, Charset):

[210] Fix | Delete

charset = Charset(charset)

[211] Fix | Delete

self._charset = charset

[212] Fix | Delete

self._continuation_ws = continuation_ws

[213] Fix | Delete

self._chunks = []

[214] Fix | Delete

if s is not None:

[215] Fix | Delete

self.append(s, charset, errors)

[216] Fix | Delete

if maxlinelen is None:

[217] Fix | Delete

maxlinelen = MAXLINELEN

[218] Fix | Delete

self._maxlinelen = maxlinelen

[219] Fix | Delete

if header_name is None:

[220] Fix | Delete

self._headerlen = 0

[221] Fix | Delete

else:

[222] Fix | Delete

# Take the separating colon and space into account.

[223] Fix | Delete

self._headerlen = len(header_name) + 2

[224] Fix | Delete

[225] Fix | Delete

def __str__(self):

[226] Fix | Delete

"""Return the string value of the header."""

[227] Fix | Delete

self._normalize()

[228] Fix | Delete

uchunks = []

[229] Fix | Delete

lastcs = None

[230] Fix | Delete

lastspace = None

[231] Fix | Delete

for string, charset in self._chunks:

[232] Fix | Delete

# We must preserve spaces between encoded and non-encoded word

[233] Fix | Delete

# boundaries, which means for us we need to add a space when we go

[234] Fix | Delete

# from a charset to None/us-ascii, or from None/us-ascii to a

[235] Fix | Delete

# charset. Only do this for the second and subsequent chunks.

[236] Fix | Delete

# Don't add a space if the None/us-ascii string already has

[237] Fix | Delete

# a space (trailing or leading depending on transition)

[238] Fix | Delete

nextcs = charset

[239] Fix | Delete

if nextcs == _charset.UNKNOWN8BIT:

[240] Fix | Delete

original_bytes = string.encode('ascii', 'surrogateescape')

[241] Fix | Delete

string = original_bytes.decode('ascii', 'replace')

[242] Fix | Delete

if uchunks:

[243] Fix | Delete

hasspace = string and self._nonctext(string[0])

[244] Fix | Delete

if lastcs not in (None, 'us-ascii'):

[245] Fix | Delete

if nextcs in (None, 'us-ascii') and not hasspace:

[246] Fix | Delete

uchunks.append(SPACE)

[247] Fix | Delete

nextcs = None

[248] Fix | Delete

elif nextcs not in (None, 'us-ascii') and not lastspace:

[249] Fix | Delete

uchunks.append(SPACE)

[250] Fix | Delete

lastspace = string and self._nonctext(string[-1])

[251] Fix | Delete

lastcs = nextcs

[252] Fix | Delete

uchunks.append(string)

[253] Fix | Delete

return EMPTYSTRING.join(uchunks)

[254] Fix | Delete

[255] Fix | Delete

# Rich comparison operators for equality only. BAW: does it make sense to

[256] Fix | Delete

# have or explicitly disable <, <=, >, >= operators?

[257] Fix | Delete

def __eq__(self, other):

[258] Fix | Delete

# other may be a Header or a string. Both are fine so coerce

[259] Fix | Delete

# ourselves to a unicode (of the unencoded header value), swap the

[260] Fix | Delete

# args and do another comparison.

[261] Fix | Delete

return other == str(self)

[262] Fix | Delete

[263] Fix | Delete

def append(self, s, charset=None, errors='strict'):

[264] Fix | Delete

"""Append a string to the MIME header.

[265] Fix | Delete

[266] Fix | Delete

Optional charset, if given, should be a Charset instance or the name

[267] Fix | Delete

of a character set (which will be converted to a Charset instance). A

[268] Fix | Delete

value of None (the default) means that the charset given in the

[269] Fix | Delete

constructor is used.

[270] Fix | Delete

[271] Fix | Delete

s may be a byte string or a Unicode string. If it is a byte string

[272] Fix | Delete

(i.e. isinstance(s, str) is false), then charset is the encoding of

[273] Fix | Delete

that byte string, and a UnicodeError will be raised if the string

[274] Fix | Delete

cannot be decoded with that charset. If s is a Unicode string, then

[275] Fix | Delete

charset is a hint specifying the character set of the characters in

[276] Fix | Delete

the string. In either case, when producing an RFC 2822 compliant

[277] Fix | Delete

header using RFC 2047 rules, the string will be encoded using the

[278] Fix | Delete

output codec of the charset. If the string cannot be encoded to the

[279] Fix | Delete

output codec, a UnicodeError will be raised.

[280] Fix | Delete

[281] Fix | Delete

Optional `errors' is passed as the errors argument to the decode

[282] Fix | Delete

call if s is a byte string.

[283] Fix | Delete

"""

[284] Fix | Delete

if charset is None:

[285] Fix | Delete

charset = self._charset

[286] Fix | Delete

elif not isinstance(charset, Charset):

[287] Fix | Delete

charset = Charset(charset)

[288] Fix | Delete

if not isinstance(s, str):

[289] Fix | Delete

input_charset = charset.input_codec or 'us-ascii'

[290] Fix | Delete

if input_charset == _charset.UNKNOWN8BIT:

[291] Fix | Delete

s = s.decode('us-ascii', 'surrogateescape')

[292] Fix | Delete

else:

[293] Fix | Delete

s = s.decode(input_charset, errors)

[294] Fix | Delete

# Ensure that the bytes we're storing can be decoded to the output

[295] Fix | Delete

# character set, otherwise an early error is raised.

[296] Fix | Delete

output_charset = charset.output_codec or 'us-ascii'

[297] Fix | Delete

if output_charset != _charset.UNKNOWN8BIT:

[298] Fix | Delete

try:

[299] Fix | Delete

s.encode(output_charset, errors)

[300] Fix | Delete

except UnicodeEncodeError:

[301] Fix | Delete

if output_charset!='us-ascii':

[302] Fix | Delete

raise

[303] Fix | Delete

charset = UTF8

[304] Fix | Delete

self._chunks.append((s, charset))

[305] Fix | Delete

[306] Fix | Delete

def _nonctext(self, s):

[307] Fix | Delete

"""True if string s is not a ctext character of RFC822.

[308] Fix | Delete

"""

[309] Fix | Delete

return s.isspace() or s in ('(', ')', '\\')

[310] Fix | Delete

[311] Fix | Delete

def encode(self, splitchars=';, \t', maxlinelen=None, linesep='\n'):

[312] Fix | Delete

r"""Encode a message header into an RFC-compliant format.

[313] Fix | Delete

[314] Fix | Delete

There are many issues involved in converting a given string for use in

[315] Fix | Delete

an email header. Only certain character sets are readable in most

[316] Fix | Delete

email clients, and as header strings can only contain a subset of

[317] Fix | Delete

7-bit ASCII, care must be taken to properly convert and encode (with

[318] Fix | Delete

Base64 or quoted-printable) header strings. In addition, there is a

[319] Fix | Delete

75-character length limit on any given encoded header field, so

[320] Fix | Delete

line-wrapping must be performed, even with double-byte character sets.

[321] Fix | Delete

[322] Fix | Delete

Optional maxlinelen specifies the maximum length of each generated

[323] Fix | Delete

line, exclusive of the linesep string. Individual lines may be longer

[324] Fix | Delete

than maxlinelen if a folding point cannot be found. The first line

[325] Fix | Delete

will be shorter by the length of the header name plus ": " if a header

[326] Fix | Delete

name was specified at Header construction time. The default value for

[327] Fix | Delete

maxlinelen is determined at header construction time.

[328] Fix | Delete

[329] Fix | Delete

Optional splitchars is a string containing characters which should be

[330] Fix | Delete

given extra weight by the splitting algorithm during normal header

[331] Fix | Delete

wrapping. This is in very rough support of RFC 2822's `higher level

[332] Fix | Delete

syntactic breaks': split points preceded by a splitchar are preferred

[333] Fix | Delete

during line splitting, with the characters preferred in the order in

[334] Fix | Delete

which they appear in the string. Space and tab may be included in the

[335] Fix | Delete

string to indicate whether preference should be given to one over the

[336] Fix | Delete

other as a split point when other split chars do not appear in the line

[337] Fix | Delete

being split. Splitchars does not affect RFC 2047 encoded lines.

[338] Fix | Delete

[339] Fix | Delete

Optional linesep is a string to be used to separate the lines of

[340] Fix | Delete

the value. The default value is the most useful for typical

[341] Fix | Delete

Python applications, but it can be set to \r\n to produce RFC-compliant

[342] Fix | Delete

line separators when needed.

[343] Fix | Delete

"""

[344] Fix | Delete

self._normalize()

[345] Fix | Delete

if maxlinelen is None:

[346] Fix | Delete

maxlinelen = self._maxlinelen

[347] Fix | Delete

# A maxlinelen of 0 means don't wrap. For all practical purposes,

[348] Fix | Delete

# choosing a huge number here accomplishes that and makes the

[349] Fix | Delete

# _ValueFormatter algorithm much simpler.

[350] Fix | Delete

if maxlinelen == 0:

[351] Fix | Delete

maxlinelen = 1000000

[352] Fix | Delete

formatter = _ValueFormatter(self._headerlen, maxlinelen,

[353] Fix | Delete

self._continuation_ws, splitchars)

[354] Fix | Delete

lastcs = None

[355] Fix | Delete

hasspace = lastspace = None

[356] Fix | Delete

for string, charset in self._chunks:

[357] Fix | Delete

if hasspace is not None:

[358] Fix | Delete

hasspace = string and self._nonctext(string[0])

[359] Fix | Delete

if lastcs not in (None, 'us-ascii'):

[360] Fix | Delete

if not hasspace or charset not in (None, 'us-ascii'):

[361] Fix | Delete

formatter.add_transition()

[362] Fix | Delete

elif charset not in (None, 'us-ascii') and not lastspace:

[363] Fix | Delete

formatter.add_transition()

[364] Fix | Delete

lastspace = string and self._nonctext(string[-1])

[365] Fix | Delete

lastcs = charset

[366] Fix | Delete

hasspace = False

[367] Fix | Delete

lines = string.splitlines()

[368] Fix | Delete

if lines:

[369] Fix | Delete

formatter.feed('', lines[0], charset)

[370] Fix | Delete

else:

[371] Fix | Delete

formatter.feed('', '', charset)

[372] Fix | Delete

for line in lines[1:]:

[373] Fix | Delete

formatter.newline()

[374] Fix | Delete

if charset.header_encoding is not None:

[375] Fix | Delete

formatter.feed(self._continuation_ws, ' ' + line.lstrip(),

[376] Fix | Delete

charset)

[377] Fix | Delete

else:

[378] Fix | Delete

sline = line.lstrip()

[379] Fix | Delete

fws = line[:len(line)-len(sline)]

[380] Fix | Delete

formatter.feed(fws, sline, charset)

[381] Fix | Delete

if len(lines) > 1:

[382] Fix | Delete

formatter.newline()

[383] Fix | Delete

if self._chunks:

[384] Fix | Delete

formatter.add_transition()

[385] Fix | Delete

value = formatter._str(linesep)

[386] Fix | Delete

if _embedded_header.search(value):

[387] Fix | Delete

raise HeaderParseError("header value appears to contain "

[388] Fix | Delete

"an embedded header: {!r}".format(value))

[389] Fix | Delete

return value

[390] Fix | Delete

[391] Fix | Delete

def _normalize(self):

[392] Fix | Delete

# Step 1: Normalize the chunks so that all runs of identical charsets

[393] Fix | Delete

# get collapsed into a single unicode string.

[394] Fix | Delete

chunks = []

[395] Fix | Delete

last_charset = None

[396] Fix | Delete

last_chunk = []

[397] Fix | Delete

for string, charset in self._chunks:

[398] Fix | Delete

if charset == last_charset:

[399] Fix | Delete

last_chunk.append(string)

[400] Fix | Delete

else:

[401] Fix | Delete

if last_charset is not None:

[402] Fix | Delete

chunks.append((SPACE.join(last_chunk), last_charset))

[403] Fix | Delete

last_chunk = [string]

[404] Fix | Delete

last_charset = charset

[405] Fix | Delete

if last_chunk:

[406] Fix | Delete

chunks.append((SPACE.join(last_chunk), last_charset))

[407] Fix | Delete

self._chunks = chunks

[408] Fix | Delete

[409] Fix | Delete

[410] Fix | Delete

[411] Fix | Delete

class _ValueFormatter:

[412] Fix | Delete

def __init__(self, headerlen, maxlen, continuation_ws, splitchars):

[413] Fix | Delete

self._maxlen = maxlen

[414] Fix | Delete

self._continuation_ws = continuation_ws

[415] Fix | Delete

self._continuation_ws_len = len(continuation_ws)

[416] Fix | Delete

self._splitchars = splitchars

[417] Fix | Delete

self._lines = []

[418] Fix | Delete

self._current_line = _Accumulator(headerlen)

[419] Fix | Delete

[420] Fix | Delete

def _str(self, linesep):

[421] Fix | Delete

self.newline()

[422] Fix | Delete

return linesep.join(self._lines)

[423] Fix | Delete

[424] Fix | Delete

def __str__(self):

[425] Fix | Delete

return self._str(NL)

[426] Fix | Delete

[427] Fix | Delete

def newline(self):

[428] Fix | Delete

end_of_line = self._current_line.pop()

[429] Fix | Delete

if end_of_line != (' ', ''):

[430] Fix | Delete

self._current_line.push(*end_of_line)

[431] Fix | Delete

if len(self._current_line) > 0:

[432] Fix | Delete

if self._current_line.is_onlyws():

[433] Fix | Delete

self._lines[-1] += str(self._current_line)

[434] Fix | Delete

else:

[435] Fix | Delete

self._lines.append(str(self._current_line))

[436] Fix | Delete

self._current_line.reset()

[437] Fix | Delete

[438] Fix | Delete

def add_transition(self):

[439] Fix | Delete

self._current_line.push(' ', '')

[440] Fix | Delete

[441] Fix | Delete

def feed(self, fws, string, charset):

[442] Fix | Delete

# If the charset has no header encoding (i.e. it is an ASCII encoding)

[443] Fix | Delete

# then we must split the header at the "highest level syntactic break"

[444] Fix | Delete

# possible. Note that we don't have a lot of smarts about field

[445] Fix | Delete

# syntax; we just try to break on semi-colons, then commas, then

[446] Fix | Delete

# whitespace. Eventually, this should be pluggable.

[447] Fix | Delete

if charset.header_encoding is None:

[448] Fix | Delete

self._ascii_split(fws, string, self._splitchars)

[449] Fix | Delete

return

[450] Fix | Delete

# Otherwise, we're doing either a Base64 or a quoted-printable

[451] Fix | Delete

# encoding which means we don't need to split the line on syntactic

[452] Fix | Delete

# breaks. We can basically just find enough characters to fit on the

[453] Fix | Delete

# current line, minus the RFC 2047 chrome. What makes this trickier

[454] Fix | Delete

# though is that we have to split at octet boundaries, not character

[455] Fix | Delete

# boundaries but it's only safe to split at character boundaries so at

[456] Fix | Delete

# best we can only get close.

[457] Fix | Delete

encoded_lines = charset.header_encode_lines(string, self._maxlengths())

[458] Fix | Delete

# The first element extends the current line, but if it's None then

[459] Fix | Delete

# nothing more fit on the current line so start a new line.

[460] Fix | Delete

try:

[461] Fix | Delete

first_line = encoded_lines.pop(0)

[462] Fix | Delete

except IndexError:

[463] Fix | Delete

# There are no encoded lines, so we're done.

[464] Fix | Delete

return

[465] Fix | Delete

if first_line is not None:

[466] Fix | Delete

self._append_chunk(fws, first_line)

[467] Fix | Delete

try:

[468] Fix | Delete

last_line = encoded_lines.pop()

[469] Fix | Delete

except IndexError:

[470] Fix | Delete

# There was only one line.

[471] Fix | Delete

return

[472] Fix | Delete

self.newline()

[473] Fix | Delete

self._current_line.push(self._continuation_ws, last_line)

[474] Fix | Delete

# Everything else are full lines in themselves.

[475] Fix | Delete

for line in encoded_lines:

[476] Fix | Delete

self._lines.append(self._continuation_ws + line)

[477] Fix | Delete

[478] Fix | Delete

def _maxlengths(self):

[479] Fix | Delete

# The first line's length.

[480] Fix | Delete

yield self._maxlen - len(self._current_line)

[481] Fix | Delete

while True:

[482] Fix | Delete

yield self._maxlen - self._continuation_ws_len

[483] Fix | Delete

[484] Fix | Delete

def _ascii_split(self, fws, string, splitchars):

[485] Fix | Delete

# The RFC 2822 header folding algorithm is simple in principle but

[486] Fix | Delete

# complex in practice. Lines may be folded any place where "folding

[487] Fix | Delete

# white space" appears by inserting a linesep character in front of the

[488] Fix | Delete

# FWS. The complication is that not all spaces or tabs qualify as FWS,

[489] Fix | Delete

# and we are also supposed to prefer to break at "higher level

[490] Fix | Delete

# syntactic breaks". We can't do either of these without intimate

[491] Fix | Delete

# knowledge of the structure of structured headers, which we don't have

[492] Fix | Delete

# here. So the best we can do here is prefer to break at the specified

[493] Fix | Delete

# splitchars, and hope that we don't choose any spaces or tabs that

[494] Fix | Delete

# aren't legal FWS. (This is at least better than the old algorithm,

[495] Fix | Delete

# where we would sometimes *introduce* FWS after a splitchar, or the

[496] Fix | Delete

# algorithm before that, where we would turn all white space runs into

[497] Fix | Delete

# single spaces or tabs.)

[498] Fix | Delete

parts = re.split("(["+FWS+"]+)", fws+string)

[499] Fix | Delete