# Copyright (C) 2001-2007 Python Software Foundation
# Contact: email-sig@python.org
"""Basic message object for the email package object model."""
__all__ = ['Message', 'EmailMessage']
from io import BytesIO, StringIO
from email._policybase import Policy, compat32
from email import charset as _charset
from email._encoded_words import decode_b
Charset = _charset.Charset
# Regular expression that matches `special' characters in parameters, the
# existence of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
# Split header parameters. BAW: this may be too simple. It isn't
# strictly RFC 2045 (section 5.1) compliant, but it catches most headers
# found in the wild. We may eventually need a full fledged parser.
# RDM: we might have a Header here; for now just stringify it.
a, sep, b = str(param).partition(';')
return a.strip(), b.strip()
def _formatparam(param, value=None, quote=True):
"""Convenience function to format and return a key=value pair.
This will quote the value if needed or if quote is true. If value is a
three tuple (charset, language, value), it will be encoded according
to RFC2231 rules. If it contains non-ascii characters it will likewise
be encoded according to RFC2231 rules, using the utf-8 charset and
if value is not None and len(value) > 0:
# A tuple is used for RFC 2231 encoded parameter values where items
# are (charset, language, value). charset is a string, not a Charset
# instance. RFC 2231 encoded values are never quoted, per RFC.
if isinstance(value, tuple):
value = utils.encode_rfc2231(value[2], value[0], value[1])
return '%s=%s' % (param, value)
except UnicodeEncodeError:
value = utils.encode_rfc2231(value, 'utf-8', '')
return '%s=%s' % (param, value)
# BAW: Please check this. I think that if quote is set it should
# force quoting even if not necessary.
if quote or tspecials.search(value):
return '%s="%s"' % (param, utils.quote(value))
return '%s=%s' % (param, value)
# RDM This might be a Header, so for now stringify it.
while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
end = s.find(';', end + 1)
f = f[:i].strip().lower() + '=' + f[i+1:].strip()
def _unquotevalue(value):
# This is different than utils.collapse_rfc2231_value() because it doesn't
# try to convert the value to a unicode. Message.get_param() and
# Message.get_params() are both currently defined to return the tuple in
# the face of RFC 2231 parameters.
if isinstance(value, tuple):
return value[0], value[1], utils.unquote(value[2])
return utils.unquote(value)
A message object is defined as something that has a bunch of RFC 2822
headers and a payload. It may optionally have an envelope header
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
multipart or a message/rfc822), then the payload is a list of Message
objects, otherwise it is a string.
Message objects implement part of the `mapping' interface, which assumes
there is exactly one occurrence of the header per message. Some headers
do in fact appear multiple times (e.g. Received) and for those headers,
you must use the explicit API to set or get all the headers. Not all of
the mapping methods are implemented.
def __init__(self, policy=compat32):
# Defaults for multipart messages
self.preamble = self.epilogue = None
self._default_type = 'text/plain'
"""Return the entire formatted message as a string.
def as_string(self, unixfrom=False, maxheaderlen=0, policy=None):
"""Return the entire formatted message as a string.
Optional 'unixfrom', when true, means include the Unix From_ envelope
header. For backward compatibility reasons, if maxheaderlen is
not specified it defaults to 0, so you must override it explicitly
if you want a different maxheaderlen. 'policy' is passed to the
Generator instance used to serialize the mesasge; if it is not
specified the policy associated with the message instance is used.
If the message object contains binary data that is not encoded
according to RFC standards, the non-compliant data will be replaced by
unicode "unknown character" code points.
from email.generator import Generator
policy = self.policy if policy is None else policy
maxheaderlen=maxheaderlen,
g.flatten(self, unixfrom=unixfrom)
"""Return the entire formatted message as a bytes object.
def as_bytes(self, unixfrom=False, policy=None):
"""Return the entire formatted message as a bytes object.
Optional 'unixfrom', when true, means include the Unix From_ envelope
header. 'policy' is passed to the BytesGenerator instance used to
serialize the message; if not specified the policy associated with
the message instance is used.
from email.generator import BytesGenerator
policy = self.policy if policy is None else policy
g = BytesGenerator(fp, mangle_from_=False, policy=policy)
g.flatten(self, unixfrom=unixfrom)
"""Return True if the message consists of multiple parts."""
return isinstance(self._payload, list)
def set_unixfrom(self, unixfrom):
self._unixfrom = unixfrom
def attach(self, payload):
"""Add the given payload to the current payload.
The current payload will always be a list of objects after this method
is called. If you want to set the payload to a scalar object, use
if self._payload is None:
self._payload = [payload]
self._payload.append(payload)
raise TypeError("Attach is not valid on a message with a"
" non-multipart payload")
def get_payload(self, i=None, decode=False):
"""Return a reference to the payload.
The payload will either be a list object or a string. If you mutate
the list object, you modify the message's payload in place. Optional
i returns that index into the payload.
Optional decode is a flag indicating whether the payload should be
decoded or not, according to the Content-Transfer-Encoding header
When True and the message is not a multipart, the payload will be
decoded if this header's value is `quoted-printable' or `base64'. If
some other encoding is used, or the header is missing, or if the
payload has bogus data (i.e. bogus base64 or uuencoded data), the
payload is returned as-is.
If the message is a multipart and the decode flag is True, then None
# Here is the logic table for this code, based on the email5.0.0 code:
# i decode is_multipart result
# ------ ------ ------------ ------------------------------
# None False True _payload (a list)
# i False True _payload element i (a Message)
# i False False error (not a list)
# i True False error (not a list)
# None False False _payload
# None True False _payload decoded (bytes)
# Note that Barry planned to factor out the 'decode' case, but that
# isn't so easy now that we handle the 8 bit data, which needs to be
# converted in both the decode and non-decode path.
# For backward compatibility, Use isinstance and this error message
# instead of the more logical is_multipart test.
if i is not None and not isinstance(self._payload, list):
raise TypeError('Expected list, got %s' % type(self._payload))
# cte might be a Header, so for now stringify it.
cte = str(self.get('content-transfer-encoding', '')).lower()
# payload may be bytes here.
if isinstance(payload, str):
if utils._has_surrogates(payload):
bpayload = payload.encode('ascii', 'surrogateescape')
payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace')
payload = bpayload.decode('ascii', 'replace')
bpayload = payload.encode('ascii')
# This won't happen for RFC compliant messages (messages
# containing only ASCII code points in the unicode input).
# If it does happen, turn the string into bytes in a way
# guaranteed not to fail.
bpayload = payload.encode('raw-unicode-escape')
if cte == 'quoted-printable':
return quopri.decodestring(bpayload)
# XXX: this is a bit of a hack; decode_b should probably be factored
# out somewhere, but I haven't figured out where yet.
value, defects = decode_b(b''.join(bpayload.splitlines()))
self.policy.handle_defect(self, defect)
elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
in_file = BytesIO(bpayload)
uu.decode(in_file, out_file, quiet=True)
return out_file.getvalue()
if isinstance(payload, str):
def set_payload(self, payload, charset=None):
"""Set the payload to the given value.
Optional charset sets the message's default character set. See
set_charset() for details.
if hasattr(payload, 'encode'):
if not isinstance(charset, Charset):
charset = Charset(charset)
payload = payload.encode(charset.output_charset)
if hasattr(payload, 'decode'):
self._payload = payload.decode('ascii', 'surrogateescape')
self.set_charset(charset)
def set_charset(self, charset):
"""Set the charset of the payload to a given character set.
charset can be a Charset instance, a string naming a character set, or
None. If it is a string it will be converted to a Charset instance.
If charset is None, the charset parameter will be removed from the
Content-Type field. Anything else will generate a TypeError.
The message will be assumed to be of type text/* encoded with
charset.input_charset. It will be converted to charset.output_charset
and encoded properly, if needed, when generating the plain text
representation of the message. MIME headers (MIME-Version,
Content-Type, Content-Transfer-Encoding) will be added as needed.
self.del_param('charset')
if not isinstance(charset, Charset):
charset = Charset(charset)
if 'MIME-Version' not in self:
self.add_header('MIME-Version', '1.0')
if 'Content-Type' not in self:
self.add_header('Content-Type', 'text/plain',
charset=charset.get_output_charset())
self.set_param('charset', charset.get_output_charset())
if charset != charset.get_output_charset():
self._payload = charset.body_encode(self._payload)
if 'Content-Transfer-Encoding' not in self:
cte = charset.get_body_encoding()
# This 'if' is for backward compatibility, it allows unicode
# through even though that won't work correctly if the
payload = payload.encode('ascii', 'surrogateescape')
payload = payload.encode(charset.output_charset)
self._payload = charset.body_encode(payload)
self.add_header('Content-Transfer-Encoding', cte)
"""Return the Charset instance associated with the message's payload.
# MAPPING INTERFACE (partial)
"""Return the total number of headers, including duplicates."""
return len(self._headers)
def __getitem__(self, name):
Return None if the header is missing instead of raising an exception.
Note that if the header appeared multiple times, exactly which
occurrence gets returned is undefined. Use get_all() to get all
the values matching a header field name.
def __setitem__(self, name, val):
"""Set the value of a header.
Note: this does not overwrite an existing header with the same field
name. Use __delitem__() first to delete any existing headers.
max_count = self.policy.header_max_count(name)
for k, v in self._headers:
raise ValueError("There may be at most {} {} headers "
"in a message".format(max_count, name))
self._headers.append(self.policy.header_store_parse(name, val))
def __delitem__(self, name):
"""Delete all occurrences of a header, if present.
Does not raise an exception if the header is missing.
for k, v in self._headers:
newheaders.append((k, v))
self._headers = newheaders
def __contains__(self, name):
return name.lower() in [k.lower() for k, v in self._headers]
for field, value in self._headers:
"""Return a list of all the message's header field names.
These will be sorted in the order they appeared in the original
message, or were added to the message, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
return [k for k, v in self._headers]
"""Return a list of all the message's header values.
These will be sorted in the order they appeared in the original
message, or were added to the message, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
return [self.policy.header_fetch_parse(k, v)
for k, v in self._headers]
"""Get all the message's header fields and values.
These will be sorted in the order they appeared in the original
message, or were added to the message, and may contain duplicates.
Any fields deleted and re-inserted are always appended to the header
return [(k, self.policy.header_fetch_parse(k, v))
for k, v in self._headers]
def get(self, name, failobj=None):
Like __getitem__() but return failobj instead of None when the field
for k, v in self._headers:
return self.policy.header_fetch_parse(k, v)
# "Internal" methods (public API, but only intended for use by a parser
# or generator, not normal application code.
def set_raw(self, name, value):
"""Store name and value in the model without modification.
This is an "internal" API, intended only for use by a parser.
self._headers.append((name, value))
"""Return the (name, value) header pairs without modification.
This is an "internal" API, intended only for use by a generator.
return iter(self._headers.copy())
# Additional useful stuff
def get_all(self, name, failobj=None):
"""Return a list of all the values for the named field.
These will be sorted in the order they appeared in the original