Edit File by line

[0] Fix | Delete

# Contact: email-sig@python.org

[1] Fix | Delete

[2] Fix | Delete

"""Classes to generate plain text from a message object tree."""

[3] Fix | Delete

[4] Fix | Delete

__all__ = ['Generator', 'DecodedGenerator']

[5] Fix | Delete

[6] Fix | Delete

import re

[7] Fix | Delete

import sys

[8] Fix | Delete

import time

[9] Fix | Delete

import random

[10] Fix | Delete

import warnings

[11] Fix | Delete

[12] Fix | Delete

from cStringIO import StringIO

[13] Fix | Delete

from email.header import Header

[14] Fix | Delete

[15] Fix | Delete

UNDERSCORE = '_'

[16] Fix | Delete

NL = '\n'

[17] Fix | Delete

[18] Fix | Delete

fcre = re.compile(r'^From ', re.MULTILINE)

[19] Fix | Delete

[20] Fix | Delete

def _is8bitstring(s):

[21] Fix | Delete

if isinstance(s, str):

[22] Fix | Delete

try:

[23] Fix | Delete

unicode(s, 'us-ascii')

[24] Fix | Delete

except UnicodeError:

[25] Fix | Delete

return True

[26] Fix | Delete

return False

[27] Fix | Delete

[28] Fix | Delete

[29] Fix | Delete

[30] Fix | Delete

class Generator:

[31] Fix | Delete

"""Generates output from a Message object tree.

[32] Fix | Delete

[33] Fix | Delete

This basic generator writes the message to the given file object as plain

[34] Fix | Delete

text.

[35] Fix | Delete

"""

[36] Fix | Delete

[37] Fix | Delete

# Public interface

[38] Fix | Delete

[39] Fix | Delete

[40] Fix | Delete

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):

[41] Fix | Delete

"""Create the generator for message flattening.

[42] Fix | Delete

[43] Fix | Delete

outfp is the output file-like object for writing the message to. It

[44] Fix | Delete

must have a write() method.

[45] Fix | Delete

[46] Fix | Delete

Optional mangle_from_ is a flag that, when True (the default), escapes

[47] Fix | Delete

From_ lines in the body of the message by putting a `>' in front of

[48] Fix | Delete

them.

[49] Fix | Delete

[50] Fix | Delete

Optional maxheaderlen specifies the longest length for a non-continued

[51] Fix | Delete

header. When a header line is longer (in characters, with tabs

[52] Fix | Delete

expanded to 8 spaces) than maxheaderlen, the header will split as

[53] Fix | Delete

defined in the Header class. Set maxheaderlen to zero to disable

[54] Fix | Delete

header wrapping. The default is 78, as recommended (but not required)

[55] Fix | Delete

by RFC 2822.

[56] Fix | Delete

"""

[57] Fix | Delete

self._fp = outfp

[58] Fix | Delete

self._mangle_from_ = mangle_from_

[59] Fix | Delete

self._maxheaderlen = maxheaderlen

[60] Fix | Delete

[61] Fix | Delete

def write(self, s):

[62] Fix | Delete

# Just delegate to the file object

[63] Fix | Delete

self._fp.write(s)

[64] Fix | Delete

[65] Fix | Delete

def flatten(self, msg, unixfrom=False):

[66] Fix | Delete

"""Print the message object tree rooted at msg to the output file

[67] Fix | Delete

specified when the Generator instance was created.

[68] Fix | Delete

[69] Fix | Delete

unixfrom is a flag that forces the printing of a Unix From_ delimiter

[70] Fix | Delete

before the first object in the message tree. If the original message

[71] Fix | Delete

has no From_ delimiter, a `standard' one is crafted. By default, this

[72] Fix | Delete

is False to inhibit the printing of any From_ delimiter.

[73] Fix | Delete

[74] Fix | Delete

Note that for subobjects, no From_ line is printed.

[75] Fix | Delete

"""

[76] Fix | Delete

if unixfrom:

[77] Fix | Delete

ufrom = msg.get_unixfrom()

[78] Fix | Delete

if not ufrom:

[79] Fix | Delete

ufrom = 'From nobody ' + time.ctime(time.time())

[80] Fix | Delete

print >> self._fp, ufrom

[81] Fix | Delete

self._write(msg)

[82] Fix | Delete

[83] Fix | Delete

def clone(self, fp):

[84] Fix | Delete

"""Clone this generator with the exact same options."""

[85] Fix | Delete

return self.__class__(fp, self._mangle_from_, self._maxheaderlen)

[86] Fix | Delete

[87] Fix | Delete

[88] Fix | Delete

# Protected interface - undocumented ;/

[89] Fix | Delete

[90] Fix | Delete

[91] Fix | Delete

def _write(self, msg):

[92] Fix | Delete

# We can't write the headers yet because of the following scenario:

[93] Fix | Delete

# say a multipart message includes the boundary string somewhere in

[94] Fix | Delete

# its body. We'd have to calculate the new boundary /before/ we write

[95] Fix | Delete

# the headers so that we can write the correct Content-Type:

[96] Fix | Delete

# parameter.

[97] Fix | Delete

[98] Fix | Delete

# The way we do this, so as to make the _handle_*() methods simpler,

[99] Fix | Delete

# is to cache any subpart writes into a StringIO. The we write the

[100] Fix | Delete

# headers and the StringIO contents. That way, subpart handlers can

[101] Fix | Delete

# Do The Right Thing, and can still modify the Content-Type: header if

[102] Fix | Delete

# necessary.

[103] Fix | Delete

oldfp = self._fp

[104] Fix | Delete

try:

[105] Fix | Delete

self._fp = sfp = StringIO()

[106] Fix | Delete

self._dispatch(msg)

[107] Fix | Delete

finally:

[108] Fix | Delete

self._fp = oldfp

[109] Fix | Delete

# Write the headers. First we see if the message object wants to

[110] Fix | Delete

# handle that itself. If not, we'll do it generically.

[111] Fix | Delete

meth = getattr(msg, '_write_headers', None)

[112] Fix | Delete

if meth is None:

[113] Fix | Delete

self._write_headers(msg)

[114] Fix | Delete

else:

[115] Fix | Delete

meth(self)

[116] Fix | Delete

self._fp.write(sfp.getvalue())

[117] Fix | Delete

[118] Fix | Delete

def _dispatch(self, msg):

[119] Fix | Delete

# Get the Content-Type: for the message, then try to dispatch to

[120] Fix | Delete

# self._handle_<maintype>_<subtype>(). If there's no handler for the

[121] Fix | Delete

# full MIME type, then dispatch to self._handle_<maintype>(). If

[122] Fix | Delete

# that's missing too, then dispatch to self._writeBody().

[123] Fix | Delete

main = msg.get_content_maintype()

[124] Fix | Delete

sub = msg.get_content_subtype()

[125] Fix | Delete

specific = UNDERSCORE.join((main, sub)).replace('-', '_')

[126] Fix | Delete

meth = getattr(self, '_handle_' + specific, None)

[127] Fix | Delete

if meth is None:

[128] Fix | Delete

generic = main.replace('-', '_')

[129] Fix | Delete

meth = getattr(self, '_handle_' + generic, None)

[130] Fix | Delete

if meth is None:

[131] Fix | Delete

meth = self._writeBody

[132] Fix | Delete

meth(msg)

[133] Fix | Delete

[134] Fix | Delete

[135] Fix | Delete

# Default handlers

[136] Fix | Delete

[137] Fix | Delete

[138] Fix | Delete

def _write_headers(self, msg):

[139] Fix | Delete

for h, v in msg.items():

[140] Fix | Delete

print >> self._fp, '%s:' % h,

[141] Fix | Delete

if self._maxheaderlen == 0:

[142] Fix | Delete

# Explicit no-wrapping

[143] Fix | Delete

print >> self._fp, v

[144] Fix | Delete

elif isinstance(v, Header):

[145] Fix | Delete

# Header instances know what to do

[146] Fix | Delete

print >> self._fp, v.encode()

[147] Fix | Delete

elif _is8bitstring(v):

[148] Fix | Delete

# If we have raw 8bit data in a byte string, we have no idea

[149] Fix | Delete

# what the encoding is. There is no safe way to split this

[150] Fix | Delete

# string. If it's ascii-subset, then we could do a normal

[151] Fix | Delete

# ascii split, but if it's multibyte then we could break the

[152] Fix | Delete

# string. There's no way to know so the least harm seems to

[153] Fix | Delete

# be to not split the string and risk it being too long.

[154] Fix | Delete

print >> self._fp, v

[155] Fix | Delete

else:

[156] Fix | Delete

# Header's got lots of smarts, so use it. Note that this is

[157] Fix | Delete

# fundamentally broken though because we lose idempotency when

[158] Fix | Delete

# the header string is continued with tabs. It will now be

[159] Fix | Delete

# continued with spaces. This was reversedly broken before we

[160] Fix | Delete

# fixed bug 1974. Either way, we lose.

[161] Fix | Delete

print >> self._fp, Header(

[162] Fix | Delete

v, maxlinelen=self._maxheaderlen, header_name=h).encode()

[163] Fix | Delete

# A blank line always separates headers from body

[164] Fix | Delete

print >> self._fp

[165] Fix | Delete

[166] Fix | Delete

[167] Fix | Delete

# Handlers for writing types and subtypes

[168] Fix | Delete

[169] Fix | Delete

[170] Fix | Delete

def _handle_text(self, msg):

[171] Fix | Delete

payload = msg.get_payload()

[172] Fix | Delete

if payload is None:

[173] Fix | Delete

return

[174] Fix | Delete

if not isinstance(payload, basestring):

[175] Fix | Delete

raise TypeError('string payload expected: %s' % type(payload))

[176] Fix | Delete

if self._mangle_from_:

[177] Fix | Delete

payload = fcre.sub('>From ', payload)

[178] Fix | Delete

self._fp.write(payload)

[179] Fix | Delete

[180] Fix | Delete

# Default body handler

[181] Fix | Delete

_writeBody = _handle_text

[182] Fix | Delete

[183] Fix | Delete

def _handle_multipart(self, msg):

[184] Fix | Delete

# The trick here is to write out each part separately, merge them all

[185] Fix | Delete

# together, and then make sure that the boundary we've chosen isn't

[186] Fix | Delete

# present in the payload.

[187] Fix | Delete

msgtexts = []

[188] Fix | Delete

subparts = msg.get_payload()

[189] Fix | Delete

if subparts is None:

[190] Fix | Delete

subparts = []

[191] Fix | Delete

elif isinstance(subparts, basestring):

[192] Fix | Delete

# e.g. a non-strict parse of a message with no starting boundary.

[193] Fix | Delete

self._fp.write(subparts)

[194] Fix | Delete

return

[195] Fix | Delete

elif not isinstance(subparts, list):

[196] Fix | Delete

# Scalar payload

[197] Fix | Delete

subparts = [subparts]

[198] Fix | Delete

for part in subparts:

[199] Fix | Delete

s = StringIO()

[200] Fix | Delete

g = self.clone(s)

[201] Fix | Delete

g.flatten(part, unixfrom=False)

[202] Fix | Delete

msgtexts.append(s.getvalue())

[203] Fix | Delete

# BAW: What about boundaries that are wrapped in double-quotes?

[204] Fix | Delete

boundary = msg.get_boundary()

[205] Fix | Delete

if not boundary:

[206] Fix | Delete

# Create a boundary that doesn't appear in any of the

[207] Fix | Delete

# message texts.

[208] Fix | Delete

alltext = NL.join(msgtexts)

[209] Fix | Delete

boundary = _make_boundary(alltext)

[210] Fix | Delete

msg.set_boundary(boundary)

[211] Fix | Delete

# If there's a preamble, write it out, with a trailing CRLF

[212] Fix | Delete

if msg.preamble is not None:

[213] Fix | Delete

if self._mangle_from_:

[214] Fix | Delete

preamble = fcre.sub('>From ', msg.preamble)

[215] Fix | Delete

else:

[216] Fix | Delete

preamble = msg.preamble

[217] Fix | Delete

print >> self._fp, preamble

[218] Fix | Delete

# dash-boundary transport-padding CRLF

[219] Fix | Delete

print >> self._fp, '--' + boundary

[220] Fix | Delete

# body-part

[221] Fix | Delete

if msgtexts:

[222] Fix | Delete

self._fp.write(msgtexts.pop(0))

[223] Fix | Delete

# *encapsulation

[224] Fix | Delete

# --> delimiter transport-padding

[225] Fix | Delete

# --> CRLF body-part

[226] Fix | Delete

for body_part in msgtexts:

[227] Fix | Delete

# delimiter transport-padding CRLF

[228] Fix | Delete

print >> self._fp, '\n--' + boundary

[229] Fix | Delete

# body-part

[230] Fix | Delete

self._fp.write(body_part)

[231] Fix | Delete

# close-delimiter transport-padding

[232] Fix | Delete

self._fp.write('\n--' + boundary + '--' + NL)

[233] Fix | Delete

if msg.epilogue is not None:

[234] Fix | Delete

if self._mangle_from_:

[235] Fix | Delete

epilogue = fcre.sub('>From ', msg.epilogue)

[236] Fix | Delete

else:

[237] Fix | Delete

epilogue = msg.epilogue

[238] Fix | Delete

self._fp.write(epilogue)

[239] Fix | Delete

[240] Fix | Delete

def _handle_multipart_signed(self, msg):

[241] Fix | Delete

# The contents of signed parts has to stay unmodified in order to keep

[242] Fix | Delete

# the signature intact per RFC1847 2.1, so we disable header wrapping.

[243] Fix | Delete

# RDM: This isn't enough to completely preserve the part, but it helps.

[244] Fix | Delete

old_maxheaderlen = self._maxheaderlen

[245] Fix | Delete

try:

[246] Fix | Delete

self._maxheaderlen = 0

[247] Fix | Delete

self._handle_multipart(msg)

[248] Fix | Delete

finally:

[249] Fix | Delete

self._maxheaderlen = old_maxheaderlen

[250] Fix | Delete

[251] Fix | Delete

def _handle_message_delivery_status(self, msg):

[252] Fix | Delete

# We can't just write the headers directly to self's file object

[253] Fix | Delete

# because this will leave an extra newline between the last header

[254] Fix | Delete

# block and the boundary. Sigh.

[255] Fix | Delete

blocks = []

[256] Fix | Delete

for part in msg.get_payload():

[257] Fix | Delete

s = StringIO()

[258] Fix | Delete

g = self.clone(s)

[259] Fix | Delete

g.flatten(part, unixfrom=False)

[260] Fix | Delete

text = s.getvalue()

[261] Fix | Delete

lines = text.split('\n')

[262] Fix | Delete

# Strip off the unnecessary trailing empty line

[263] Fix | Delete

if lines and lines[-1] == '':

[264] Fix | Delete

blocks.append(NL.join(lines[:-1]))

[265] Fix | Delete

else:

[266] Fix | Delete

blocks.append(text)

[267] Fix | Delete

# Now join all the blocks with an empty line. This has the lovely

[268] Fix | Delete

# effect of separating each block with an empty line, but not adding

[269] Fix | Delete

# an extra one after the last one.

[270] Fix | Delete

self._fp.write(NL.join(blocks))

[271] Fix | Delete

[272] Fix | Delete

def _handle_message(self, msg):

[273] Fix | Delete

s = StringIO()

[274] Fix | Delete

g = self.clone(s)

[275] Fix | Delete

# The payload of a message/rfc822 part should be a multipart sequence

[276] Fix | Delete

# of length 1. The zeroth element of the list should be the Message

[277] Fix | Delete

# object for the subpart. Extract that object, stringify it, and

[278] Fix | Delete

# write it out.

[279] Fix | Delete

# Except, it turns out, when it's a string instead, which happens when

[280] Fix | Delete

# and only when HeaderParser is used on a message of mime type

[281] Fix | Delete

# message/rfc822. Such messages are generated by, for example,

[282] Fix | Delete

# Groupwise when forwarding unadorned messages. (Issue 7970.) So

[283] Fix | Delete

# in that case we just emit the string body.

[284] Fix | Delete

payload = msg.get_payload()

[285] Fix | Delete

if isinstance(payload, list):

[286] Fix | Delete

g.flatten(msg.get_payload(0), unixfrom=False)

[287] Fix | Delete

payload = s.getvalue()

[288] Fix | Delete

self._fp.write(payload)

[289] Fix | Delete

[290] Fix | Delete

[291] Fix | Delete

[292] Fix | Delete

_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'

[293] Fix | Delete

[294] Fix | Delete

class DecodedGenerator(Generator):

[295] Fix | Delete

"""Generates a text representation of a message.

[296] Fix | Delete

[297] Fix | Delete

Like the Generator base class, except that non-text parts are substituted

[298] Fix | Delete

with a format string representing the part.

[299] Fix | Delete

"""

[300] Fix | Delete

def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):

[301] Fix | Delete

"""Like Generator.__init__() except that an additional optional

[302] Fix | Delete

argument is allowed.

[303] Fix | Delete

[304] Fix | Delete

Walks through all subparts of a message. If the subpart is of main

[305] Fix | Delete

type `text', then it prints the decoded payload of the subpart.

[306] Fix | Delete

[307] Fix | Delete

Otherwise, fmt is a format string that is used instead of the message

[308] Fix | Delete

payload. fmt is expanded with the following keywords (in

[309] Fix | Delete

%(keyword)s format):

[310] Fix | Delete

[311] Fix | Delete

type : Full MIME type of the non-text part

[312] Fix | Delete

maintype : Main MIME type of the non-text part

[313] Fix | Delete

subtype : Sub-MIME type of the non-text part

[314] Fix | Delete

filename : Filename of the non-text part

[315] Fix | Delete

description: Description associated with the non-text part

[316] Fix | Delete

encoding : Content transfer encoding of the non-text part

[317] Fix | Delete

[318] Fix | Delete

The default value for fmt is None, meaning

[319] Fix | Delete

[320] Fix | Delete

[Non-text (%(type)s) part of message omitted, filename %(filename)s]

[321] Fix | Delete

"""

[322] Fix | Delete

Generator.__init__(self, outfp, mangle_from_, maxheaderlen)

[323] Fix | Delete

if fmt is None:

[324] Fix | Delete

self._fmt = _FMT

[325] Fix | Delete

else:

[326] Fix | Delete

self._fmt = fmt

[327] Fix | Delete

[328] Fix | Delete

def _dispatch(self, msg):

[329] Fix | Delete

for part in msg.walk():

[330] Fix | Delete

maintype = part.get_content_maintype()

[331] Fix | Delete

if maintype == 'text':

[332] Fix | Delete

print >> self, part.get_payload(decode=True)

[333] Fix | Delete

elif maintype == 'multipart':

[334] Fix | Delete

# Just skip this

[335] Fix | Delete

pass

[336] Fix | Delete

else:

[337] Fix | Delete

print >> self, self._fmt % {

[338] Fix | Delete

'type' : part.get_content_type(),

[339] Fix | Delete

'maintype' : part.get_content_maintype(),

[340] Fix | Delete

'subtype' : part.get_content_subtype(),

[341] Fix | Delete

'filename' : part.get_filename('[no filename]'),

[342] Fix | Delete

'description': part.get('Content-Description',

[343] Fix | Delete

'[no description]'),

[344] Fix | Delete

'encoding' : part.get('Content-Transfer-Encoding',

[345] Fix | Delete

'[no encoding]'),

[346] Fix | Delete

}

[347] Fix | Delete

[348] Fix | Delete

[349] Fix | Delete

[350] Fix | Delete

# Helper

[351] Fix | Delete

_width = len(repr(sys.maxint-1))

[352] Fix | Delete

_fmt = '%%0%dd' % _width

[353] Fix | Delete

[354] Fix | Delete

def _make_boundary(text=None):

[355] Fix | Delete

# Craft a random boundary. If text is given, ensure that the chosen

[356] Fix | Delete

# boundary doesn't appear in the text.

[357] Fix | Delete

token = random.randrange(sys.maxint)

[358] Fix | Delete

boundary = ('=' * 15) + (_fmt % token) + '=='

[359] Fix | Delete

if text is None:

[360] Fix | Delete

return boundary

[361] Fix | Delete

b = boundary

[362] Fix | Delete

counter = 0

[363] Fix | Delete

while True:

[364] Fix | Delete

cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)

[365] Fix | Delete

if not cre.search(text):

[366] Fix | Delete

break

[367] Fix | Delete

b = boundary + '.' + str(counter)

[368] Fix | Delete

counter += 1

[369] Fix | Delete

return b

[370] Fix | Delete

[371] Fix | Delete