"""Representing and manipulating email headers via custom objects.
This module provides an implementation of the HeaderRegistry API.
The implementation is designed to flexibly follow RFC5322 rules.
Eventually HeaderRegistry will be a public API, but it isn't yet,
and will probably change some before that happens.
from types import MappingProxyType
from email import _header_value_parser as parser
def __init__(self, display_name='', username='', domain='', addr_spec=None):
"""Create an object representing a full email address.
An address can have a 'display_name', a 'username', and a 'domain'. In
addition to specifying the username and domain separately, they may be
specified together by using the addr_spec keyword *instead of* the
username and domain keywords. If an addr_spec string is specified it
must be properly quoted according to RFC 5322 rules; an error will be
An Address object has display_name, username, domain, and addr_spec
attributes, all of which are read-only. The addr_spec and the string
value of the object are both quoted according to RFC5322 rules, but
without any Content Transfer Encoding.
# This clause with its potential 'raise' may only happen when an
# application program creates an Address object using an addr_spec
# keyword. The email library code itself must always supply username
if addr_spec is not None:
raise TypeError("addrspec specified when username and/or "
a_s, rest = parser.get_addr_spec(addr_spec)
raise ValueError("Invalid addr_spec; only '{}' "
"could be parsed from '{}'".format(
username = a_s.local_part
self._display_name = display_name
self._username = username
return self._display_name
"""The addr_spec (username@domain) portion of the address, quoted
according to RFC 5322 rules, but with no Content Transfer Encoding.
nameset = set(self.username)
if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
lp = parser.quote_string(self.username)
return lp + '@' + self.domain
return "{}(display_name={!r}, username={!r}, domain={!r})".format(
self.display_name, self.username, self.domain)
nameset = set(self.display_name)
if len(nameset) > len(nameset-parser.SPECIALS):
disp = parser.quote_string(self.display_name)
addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
return "{} <{}>".format(disp, addr_spec)
if type(other) != type(self):
return (self.display_name == other.display_name and
self.username == other.username and
self.domain == other.domain)
def __init__(self, display_name=None, addresses=None):
"""Create an object representing an address group.
An address group consists of a display_name followed by colon and a
list of addresses (see Address) terminated by a semi-colon. The Group
is created by specifying a display_name and a possibly empty list of
Address objects. A Group can also be used to represent a single
address that is not in a group, which is convenient when manipulating
lists that are a combination of Groups and individual Addresses. In
this case the display_name should be set to None. In particular, the
string representation of a Group whose display_name is None is the same
as the Address object, if there is one and only one Address object in
self._display_name = display_name
self._addresses = tuple(addresses) if addresses else tuple()
return self._display_name
return "{}(display_name={!r}, addresses={!r}".format(
self.display_name, self.addresses)
if self.display_name is None and len(self.addresses)==1:
return str(self.addresses[0])
if len(nameset) > len(nameset-parser.SPECIALS):
disp = parser.quote_string(disp)
adrstr = ", ".join(str(x) for x in self.addresses)
adrstr = ' ' + adrstr if adrstr else adrstr
return "{}:{};".format(disp, adrstr)
if type(other) != type(self):
return (self.display_name == other.display_name and
self.addresses == other.addresses)
"""Base class for message headers.
Implements generic behavior and provides tools for subclasses.
A subclass must define a classmethod named 'parse' that takes an unfolded
value string and a dictionary as its arguments. The dictionary will
contain one key, 'defects', initialized to an empty list. After the call
the dictionary must contain two additional keys: parse_tree, set to the
parse tree obtained from parsing the header, and 'decoded', set to the
string value of the idealized representation of the data from the value.
(That is, encoded words are decoded, and values that have canonical
representations are so represented.)
The defects key is intended to collect parsing defects, which the message
parser will subsequently dispose of as appropriate. The parser should not,
insofar as practical, raise any errors. Defects should be added to the
list instead. The standard header parsers register defects for RFC
compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
The parse method may add additional keys to the dictionary. In this case
the subclass must define an 'init' method, which will be passed the
dictionary as its keyword arguments. The method should use (usually by
setting them as the value of similarly named attributes) and remove all the
extra keys added by its parse method, and then use super to call its parent
class with the remaining arguments and keywords.
The subclass should also make sure that a 'max_count' attribute is defined
that is either None or 1. XXX: need to better define this API.
def __new__(cls, name, value):
if utils._has_surrogates(kwds['decoded']):
kwds['decoded'] = utils._sanitize(kwds['decoded'])
self = str.__new__(cls, kwds['decoded'])
def init(self, name, *, parse_tree, defects):
self._parse_tree = parse_tree
return tuple(self._defects)
self.__class__.__bases__,
def _reconstruct(cls, value):
return str.__new__(cls, value)
def fold(self, *, policy):
"""Fold header according to policy.
The parsed representation of the header is folded according to
RFC5322 rules, as modified by the policy. If the parse tree
contains surrogateescaped bytes, the bytes are CTE encoded using
the charset 'unknown-8bit".
Any non-ASCII characters in the parse tree are CTE encoded using
charset utf-8. XXX: make this a policy setting.
The returned value is an ASCII-only string possibly containing linesep
characters, and ending with a linesep character. The string includes
the header name and the ': ' separator.
# At some point we need to put fws here iif it was in the source.
parser.ValueTerminal(self.name, 'header-name'),
parser.ValueTerminal(':', 'header-sep')]),
parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
header.append(self._parse_tree)
return header.fold(policy=policy)
def _reconstruct_header(cls_name, bases, value):
return type(cls_name, bases, {})._reconstruct(value)
class UnstructuredHeader:
value_parser = staticmethod(parser.get_unstructured)
def parse(cls, value, kwds):
kwds['parse_tree'] = cls.value_parser(value)
kwds['decoded'] = str(kwds['parse_tree'])
class UniqueUnstructuredHeader(UnstructuredHeader):
"""Header whose value consists of a single timestamp.
Provides an additional attribute, datetime, which is either an aware
datetime using a timezone, or a naive datetime if the timezone
in the input string is -0000. Also accepts a datetime as input.
The 'value' attribute is the normalized form of the timestamp,
which means it is the output of format_datetime on the datetime.
# This is used only for folding, not for creating 'decoded'.
value_parser = staticmethod(parser.get_unstructured)
def parse(cls, value, kwds):
kwds['defects'].append(errors.HeaderMissingRequiredValue())
kwds['parse_tree'] = parser.TokenList()
if isinstance(value, str):
value = utils.parsedate_to_datetime(value)
kwds['decoded'] = utils.format_datetime(kwds['datetime'])
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
def init(self, *args, **kw):
self._datetime = kw.pop('datetime')
super().init(*args, **kw)
class UniqueDateHeader(DateHeader):
address_list, value = parser.get_address_list(value)
assert not value, 'this should not happen'
def parse(cls, value, kwds):
if isinstance(value, str):
# We are translating here from the RFC language (address/mailbox)
# to our API language (group/address).
kwds['parse_tree'] = address_list = cls.value_parser(value)
for addr in address_list.addresses:
groups.append(Group(addr.display_name,
[Address(mb.display_name or '',
for mb in addr.all_mailboxes]))
defects = list(address_list.all_defects)
# Assume it is Address/Group stuff
if not hasattr(value, '__iter__'):
groups = [Group(None, [item]) if not hasattr(item, 'addresses')
kwds['defects'] = defects
kwds['decoded'] = ', '.join([str(item) for item in groups])
if 'parse_tree' not in kwds:
kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
def init(self, *args, **kw):
self._groups = tuple(kw.pop('groups'))
super().init(*args, **kw)
if self._addresses is None:
self._addresses = tuple([address for group in self._groups
for address in group.addresses])
class UniqueAddressHeader(AddressHeader):
class SingleAddressHeader(AddressHeader):
if len(self.addresses)!=1:
raise ValueError(("value of single address header {} is not "
"a single address").format(self.name))
class UniqueSingleAddressHeader(SingleAddressHeader):
value_parser = staticmethod(parser.parse_mime_version)
def parse(cls, value, kwds):
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
kwds['decoded'] = str(parse_tree)
kwds['defects'].extend(parse_tree.all_defects)
kwds['major'] = None if parse_tree.minor is None else parse_tree.major
kwds['minor'] = parse_tree.minor
if parse_tree.minor is not None:
kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
def init(self, *args, **kw):
self._version = kw.pop('version')
self._major = kw.pop('major')
self._minor = kw.pop('minor')
super().init(*args, **kw)
class ParameterizedMIMEHeader:
# Mixin that handles the params dict. Must be subclassed and
# a property value_parser for the specific header provided.
def parse(cls, value, kwds):
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
kwds['decoded'] = str(parse_tree)
kwds['defects'].extend(parse_tree.all_defects)
if parse_tree.params is None:
# The MIME RFCs specify that parameter ordering is arbitrary.
kwds['params'] = {utils._sanitize(name).lower():
for name, value in parse_tree.params}
def init(self, *args, **kw):
self._params = kw.pop('params')
super().init(*args, **kw)
return MappingProxyType(self._params)
class ContentTypeHeader(ParameterizedMIMEHeader):
value_parser = staticmethod(parser.parse_content_type_header)
def init(self, *args, **kw):
super().init(*args, **kw)
self._maintype = utils._sanitize(self._parse_tree.maintype)
self._subtype = utils._sanitize(self._parse_tree.subtype)
return self.maintype + '/' + self.subtype
class ContentDispositionHeader(ParameterizedMIMEHeader):
value_parser = staticmethod(parser.parse_content_disposition_header)
def init(self, *args, **kw):
super().init(*args, **kw)
cd = self._parse_tree.content_disposition
self._content_disposition = cd if cd is None else utils._sanitize(cd)
def content_disposition(self):
return self._content_disposition