Edit File by line

[0] Fix | Delete

# Author: Barry Warsaw

[1] Fix | Delete

# Contact: email-sig@python.org

[2] Fix | Delete

[3] Fix | Delete

"""Miscellaneous utilities."""

[4] Fix | Delete

[5] Fix | Delete

__all__ = [

[6] Fix | Delete

'collapse_rfc2231_value',

[7] Fix | Delete

'decode_params',

[8] Fix | Delete

'decode_rfc2231',

[9] Fix | Delete

'encode_rfc2231',

[10] Fix | Delete

'formataddr',

[11] Fix | Delete

'formatdate',

[12] Fix | Delete

'format_datetime',

[13] Fix | Delete

'getaddresses',

[14] Fix | Delete

'make_msgid',

[15] Fix | Delete

'mktime_tz',

[16] Fix | Delete

'parseaddr',

[17] Fix | Delete

'parsedate',

[18] Fix | Delete

'parsedate_tz',

[19] Fix | Delete

'parsedate_to_datetime',

[20] Fix | Delete

'unquote',

[21] Fix | Delete

]

[22] Fix | Delete

[23] Fix | Delete

import os

[24] Fix | Delete

import re

[25] Fix | Delete

import time

[26] Fix | Delete

import random

[27] Fix | Delete

import socket

[28] Fix | Delete

import datetime

[29] Fix | Delete

import urllib.parse

[30] Fix | Delete

[31] Fix | Delete

from email._parseaddr import quote

[32] Fix | Delete

from email._parseaddr import AddressList as _AddressList

[33] Fix | Delete

from email._parseaddr import mktime_tz

[34] Fix | Delete

[35] Fix | Delete

from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz

[36] Fix | Delete

[37] Fix | Delete

# Intrapackage imports

[38] Fix | Delete

from email.charset import Charset

[39] Fix | Delete

[40] Fix | Delete

COMMASPACE = ', '

[41] Fix | Delete

EMPTYSTRING = ''

[42] Fix | Delete

UEMPTYSTRING = ''

[43] Fix | Delete

CRLF = '\r\n'

[44] Fix | Delete

TICK = "'"

[45] Fix | Delete

[46] Fix | Delete

specialsre = re.compile(r'[][\\()<>@,:;".]')

[47] Fix | Delete

escapesre = re.compile(r'[\\"]')

[48] Fix | Delete

[49] Fix | Delete

def _has_surrogates(s):

[50] Fix | Delete

"""Return True if s contains surrogate-escaped binary data."""

[51] Fix | Delete

# This check is based on the fact that unless there are surrogates, utf8

[52] Fix | Delete

# (Python's default encoding) can encode any string. This is the fastest

[53] Fix | Delete

# way to check for surrogates, see issue 11454 for timings.

[54] Fix | Delete

try:

[55] Fix | Delete

s.encode()

[56] Fix | Delete

return False

[57] Fix | Delete

except UnicodeEncodeError:

[58] Fix | Delete

return True

[59] Fix | Delete

[60] Fix | Delete

# How to deal with a string containing bytes before handing it to the

[61] Fix | Delete

# application through the 'normal' interface.

[62] Fix | Delete

def _sanitize(string):

[63] Fix | Delete

# Turn any escaped bytes into unicode 'unknown' char. If the escaped

[64] Fix | Delete

# bytes happen to be utf-8 they will instead get decoded, even if they

[65] Fix | Delete

# were invalid in the charset the source was supposed to be in. This

[66] Fix | Delete

# seems like it is not a bad thing; a defect was still registered.

[67] Fix | Delete

original_bytes = string.encode('utf-8', 'surrogateescape')

[68] Fix | Delete

return original_bytes.decode('utf-8', 'replace')

[69] Fix | Delete

[70] Fix | Delete

[71] Fix | Delete

[72] Fix | Delete

# Helpers

[73] Fix | Delete

[74] Fix | Delete

def formataddr(pair, charset='utf-8'):

[75] Fix | Delete

"""The inverse of parseaddr(), this takes a 2-tuple of the form

[76] Fix | Delete

(realname, email_address) and returns the string value suitable

[77] Fix | Delete

for an RFC 2822 From, To or Cc header.

[78] Fix | Delete

[79] Fix | Delete

If the first element of pair is false, then the second element is

[80] Fix | Delete

returned unmodified.

[81] Fix | Delete

[82] Fix | Delete

The optional charset is the character set that is used to encode

[83] Fix | Delete

realname in case realname is not ASCII safe. Can be an instance of str or

[84] Fix | Delete

a Charset-like object which has a header_encode method. Default is

[85] Fix | Delete

'utf-8'.

[86] Fix | Delete

"""

[87] Fix | Delete

name, address = pair

[88] Fix | Delete

# The address MUST (per RFC) be ascii, so raise a UnicodeError if it isn't.

[89] Fix | Delete

address.encode('ascii')

[90] Fix | Delete

if name:

[91] Fix | Delete

try:

[92] Fix | Delete

name.encode('ascii')

[93] Fix | Delete

except UnicodeEncodeError:

[94] Fix | Delete

if isinstance(charset, str):

[95] Fix | Delete

charset = Charset(charset)

[96] Fix | Delete

encoded_name = charset.header_encode(name)

[97] Fix | Delete

return "%s <%s>" % (encoded_name, address)

[98] Fix | Delete

else:

[99] Fix | Delete

quotes = ''

[100] Fix | Delete

if specialsre.search(name):

[101] Fix | Delete

quotes = '"'

[102] Fix | Delete

name = escapesre.sub(r'\\\g<0>', name)

[103] Fix | Delete

return '%s%s%s <%s>' % (quotes, name, quotes, address)

[104] Fix | Delete

return address

[105] Fix | Delete

[106] Fix | Delete

[107] Fix | Delete

[108] Fix | Delete

def getaddresses(fieldvalues):

[109] Fix | Delete

"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""

[110] Fix | Delete

all = COMMASPACE.join(fieldvalues)

[111] Fix | Delete

a = _AddressList(all)

[112] Fix | Delete

return a.addresslist

[113] Fix | Delete

[114] Fix | Delete

[115] Fix | Delete

def _format_timetuple_and_zone(timetuple, zone):

[116] Fix | Delete

return '%s, %02d %s %04d %02d:%02d:%02d %s' % (

[117] Fix | Delete

['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][timetuple[6]],

[118] Fix | Delete

timetuple[2],

[119] Fix | Delete

['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',

[120] Fix | Delete

'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][timetuple[1] - 1],

[121] Fix | Delete

timetuple[0], timetuple[3], timetuple[4], timetuple[5],

[122] Fix | Delete

zone)

[123] Fix | Delete

[124] Fix | Delete

def formatdate(timeval=None, localtime=False, usegmt=False):

[125] Fix | Delete

"""Returns a date string as specified by RFC 2822, e.g.:

[126] Fix | Delete

[127] Fix | Delete

Fri, 09 Nov 2001 01:08:47 -0000

[128] Fix | Delete

[129] Fix | Delete

Optional timeval if given is a floating point time value as accepted by

[130] Fix | Delete

gmtime() and localtime(), otherwise the current time is used.

[131] Fix | Delete

[132] Fix | Delete

Optional localtime is a flag that when True, interprets timeval, and

[133] Fix | Delete

returns a date relative to the local timezone instead of UTC, properly

[134] Fix | Delete

taking daylight savings time into account.

[135] Fix | Delete

[136] Fix | Delete

Optional argument usegmt means that the timezone is written out as

[137] Fix | Delete

an ascii string, not numeric one (so "GMT" instead of "+0000"). This

[138] Fix | Delete

is needed for HTTP, and is only used when localtime==False.

[139] Fix | Delete

"""

[140] Fix | Delete

# Note: we cannot use strftime() because that honors the locale and RFC

[141] Fix | Delete

# 2822 requires that day and month names be the English abbreviations.

[142] Fix | Delete

if timeval is None:

[143] Fix | Delete

timeval = time.time()

[144] Fix | Delete

if localtime or usegmt:

[145] Fix | Delete

dt = datetime.datetime.fromtimestamp(timeval, datetime.timezone.utc)

[146] Fix | Delete

else:

[147] Fix | Delete

dt = datetime.datetime.utcfromtimestamp(timeval)

[148] Fix | Delete

if localtime:

[149] Fix | Delete

dt = dt.astimezone()

[150] Fix | Delete

usegmt = False

[151] Fix | Delete

return format_datetime(dt, usegmt)

[152] Fix | Delete

[153] Fix | Delete

def format_datetime(dt, usegmt=False):

[154] Fix | Delete

"""Turn a datetime into a date string as specified in RFC 2822.

[155] Fix | Delete

[156] Fix | Delete

If usegmt is True, dt must be an aware datetime with an offset of zero. In

[157] Fix | Delete

this case 'GMT' will be rendered instead of the normal +0000 required by

[158] Fix | Delete

RFC2822. This is to support HTTP headers involving date stamps.

[159] Fix | Delete

"""

[160] Fix | Delete

now = dt.timetuple()

[161] Fix | Delete

if usegmt:

[162] Fix | Delete

if dt.tzinfo is None or dt.tzinfo != datetime.timezone.utc:

[163] Fix | Delete

raise ValueError("usegmt option requires a UTC datetime")

[164] Fix | Delete

zone = 'GMT'

[165] Fix | Delete

elif dt.tzinfo is None:

[166] Fix | Delete

zone = '-0000'

[167] Fix | Delete

else:

[168] Fix | Delete

zone = dt.strftime("%z")

[169] Fix | Delete

return _format_timetuple_and_zone(now, zone)

[170] Fix | Delete

[171] Fix | Delete

[172] Fix | Delete

def make_msgid(idstring=None, domain=None):

[173] Fix | Delete

"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:

[174] Fix | Delete

[175] Fix | Delete

<142480216486.20800.16526388040877946887@nightshade.la.mastaler.com>

[176] Fix | Delete

[177] Fix | Delete

Optional idstring if given is a string used to strengthen the

[178] Fix | Delete

uniqueness of the message id. Optional domain if given provides the

[179] Fix | Delete

portion of the message id after the '@'. It defaults to the locally

[180] Fix | Delete

defined hostname.

[181] Fix | Delete

"""

[182] Fix | Delete

timeval = int(time.time()*100)

[183] Fix | Delete

pid = os.getpid()

[184] Fix | Delete

randint = random.getrandbits(64)

[185] Fix | Delete

if idstring is None:

[186] Fix | Delete

idstring = ''

[187] Fix | Delete

else:

[188] Fix | Delete

idstring = '.' + idstring

[189] Fix | Delete

if domain is None:

[190] Fix | Delete

domain = socket.getfqdn()

[191] Fix | Delete

msgid = '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain)

[192] Fix | Delete

return msgid

[193] Fix | Delete

[194] Fix | Delete

[195] Fix | Delete

def parsedate_to_datetime(data):

[196] Fix | Delete

*dtuple, tz = _parsedate_tz(data)

[197] Fix | Delete

if tz is None:

[198] Fix | Delete

return datetime.datetime(*dtuple[:6])

[199] Fix | Delete

return datetime.datetime(*dtuple[:6],

[200] Fix | Delete

tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))

[201] Fix | Delete

[202] Fix | Delete

[203] Fix | Delete

def parseaddr(addr):

[204] Fix | Delete

"""

[205] Fix | Delete

Parse addr into its constituent realname and email address parts.

[206] Fix | Delete

[207] Fix | Delete

Return a tuple of realname and email address, unless the parse fails, in

[208] Fix | Delete

which case return a 2-tuple of ('', '').

[209] Fix | Delete

"""

[210] Fix | Delete

addrs = _AddressList(addr).addresslist

[211] Fix | Delete

if not addrs:

[212] Fix | Delete

return '', ''

[213] Fix | Delete

return addrs[0]

[214] Fix | Delete

[215] Fix | Delete

[216] Fix | Delete

# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.

[217] Fix | Delete

def unquote(str):

[218] Fix | Delete

"""Remove quotes from a string."""

[219] Fix | Delete

if len(str) > 1:

[220] Fix | Delete

if str.startswith('"') and str.endswith('"'):

[221] Fix | Delete

return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')

[222] Fix | Delete

if str.startswith('<') and str.endswith('>'):

[223] Fix | Delete

return str[1:-1]

[224] Fix | Delete

return str

[225] Fix | Delete

[226] Fix | Delete

[227] Fix | Delete

[228] Fix | Delete

# RFC2231-related functions - parameter encoding and decoding

[229] Fix | Delete

def decode_rfc2231(s):

[230] Fix | Delete

"""Decode string according to RFC 2231"""

[231] Fix | Delete

parts = s.split(TICK, 2)

[232] Fix | Delete

if len(parts) <= 2:

[233] Fix | Delete

return None, None, s

[234] Fix | Delete

return parts

[235] Fix | Delete

[236] Fix | Delete

[237] Fix | Delete

def encode_rfc2231(s, charset=None, language=None):

[238] Fix | Delete

"""Encode string according to RFC 2231.

[239] Fix | Delete

[240] Fix | Delete

If neither charset nor language is given, then s is returned as-is. If

[241] Fix | Delete

charset is given but not language, the string is encoded using the empty

[242] Fix | Delete

string for language.

[243] Fix | Delete

"""

[244] Fix | Delete

s = urllib.parse.quote(s, safe='', encoding=charset or 'ascii')

[245] Fix | Delete

if charset is None and language is None:

[246] Fix | Delete

return s

[247] Fix | Delete

if language is None:

[248] Fix | Delete

language = ''

[249] Fix | Delete

return "%s'%s'%s" % (charset, language, s)

[250] Fix | Delete

[251] Fix | Delete

[252] Fix | Delete

rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$',

[253] Fix | Delete

re.ASCII)

[254] Fix | Delete

[255] Fix | Delete

def decode_params(params):

[256] Fix | Delete

"""Decode parameters list according to RFC 2231.

[257] Fix | Delete

[258] Fix | Delete

params is a sequence of 2-tuples containing (param name, string value).

[259] Fix | Delete

"""

[260] Fix | Delete

# Copy params so we don't mess with the original

[261] Fix | Delete

params = params[:]

[262] Fix | Delete

new_params = []

[263] Fix | Delete

# Map parameter's name to a list of continuations. The values are a

[264] Fix | Delete

# 3-tuple of the continuation number, the string value, and a flag

[265] Fix | Delete

# specifying whether a particular segment is %-encoded.

[266] Fix | Delete

rfc2231_params = {}

[267] Fix | Delete

name, value = params.pop(0)

[268] Fix | Delete

new_params.append((name, value))

[269] Fix | Delete

while params:

[270] Fix | Delete

name, value = params.pop(0)

[271] Fix | Delete

if name.endswith('*'):

[272] Fix | Delete

encoded = True

[273] Fix | Delete

else:

[274] Fix | Delete

encoded = False

[275] Fix | Delete

value = unquote(value)

[276] Fix | Delete

mo = rfc2231_continuation.match(name)

[277] Fix | Delete

if mo:

[278] Fix | Delete

name, num = mo.group('name', 'num')

[279] Fix | Delete

if num is not None:

[280] Fix | Delete

num = int(num)

[281] Fix | Delete

rfc2231_params.setdefault(name, []).append((num, value, encoded))

[282] Fix | Delete

else:

[283] Fix | Delete

new_params.append((name, '"%s"' % quote(value)))

[284] Fix | Delete

if rfc2231_params:

[285] Fix | Delete

for name, continuations in rfc2231_params.items():

[286] Fix | Delete

value = []

[287] Fix | Delete

extended = False

[288] Fix | Delete

# Sort by number

[289] Fix | Delete

continuations.sort()

[290] Fix | Delete

# And now append all values in numerical order, converting

[291] Fix | Delete

# %-encodings for the encoded segments. If any of the

[292] Fix | Delete

# continuation names ends in a *, then the entire string, after

[293] Fix | Delete

# decoding segments and concatenating, must have the charset and

[294] Fix | Delete

# language specifiers at the beginning of the string.

[295] Fix | Delete

for num, s, encoded in continuations:

[296] Fix | Delete

if encoded:

[297] Fix | Delete

# Decode as "latin-1", so the characters in s directly

[298] Fix | Delete

# represent the percent-encoded octet values.

[299] Fix | Delete

# collapse_rfc2231_value treats this as an octet sequence.

[300] Fix | Delete

s = urllib.parse.unquote(s, encoding="latin-1")

[301] Fix | Delete

extended = True

[302] Fix | Delete

value.append(s)

[303] Fix | Delete

value = quote(EMPTYSTRING.join(value))

[304] Fix | Delete

if extended:

[305] Fix | Delete

charset, language, value = decode_rfc2231(value)

[306] Fix | Delete

new_params.append((name, (charset, language, '"%s"' % value)))

[307] Fix | Delete

else:

[308] Fix | Delete

new_params.append((name, '"%s"' % value))

[309] Fix | Delete

return new_params

[310] Fix | Delete

[311] Fix | Delete

def collapse_rfc2231_value(value, errors='replace',

[312] Fix | Delete

fallback_charset='us-ascii'):

[313] Fix | Delete

if not isinstance(value, tuple) or len(value) != 3:

[314] Fix | Delete

return unquote(value)

[315] Fix | Delete

# While value comes to us as a unicode string, we need it to be a bytes

[316] Fix | Delete

# object. We do not want bytes() normal utf-8 decoder, we want a straight

[317] Fix | Delete

# interpretation of the string as character bytes.

[318] Fix | Delete

charset, language, text = value

[319] Fix | Delete

if charset is None:

[320] Fix | Delete

# Issue 17369: if charset/lang is None, decode_rfc2231 couldn't parse

[321] Fix | Delete

# the value, so use the fallback_charset.

[322] Fix | Delete

charset = fallback_charset

[323] Fix | Delete

rawbytes = bytes(text, 'raw-unicode-escape')

[324] Fix | Delete

try:

[325] Fix | Delete

return str(rawbytes, charset, errors)

[326] Fix | Delete

except LookupError:

[327] Fix | Delete

# charset is not a known codec.

[328] Fix | Delete

return unquote(text)

[329] Fix | Delete

[330] Fix | Delete

[331] Fix | Delete

[332] Fix | Delete

# datetime doesn't provide a localtime function yet, so provide one. Code

[333] Fix | Delete

# adapted from the patch in issue 9527. This may not be perfect, but it is

[334] Fix | Delete

# better than not having it.

[335] Fix | Delete

[336] Fix | Delete

[337] Fix | Delete

def localtime(dt=None, isdst=-1):

[338] Fix | Delete

"""Return local time as an aware datetime object.

[339] Fix | Delete

[340] Fix | Delete

If called without arguments, return current time. Otherwise *dt*

[341] Fix | Delete

argument should be a datetime instance, and it is converted to the

[342] Fix | Delete

local time zone according to the system time zone database. If *dt* is

[343] Fix | Delete

naive (that is, dt.tzinfo is None), it is assumed to be in local time.

[344] Fix | Delete

In this case, a positive or zero value for *isdst* causes localtime to

[345] Fix | Delete

presume initially that summer time (for example, Daylight Saving Time)

[346] Fix | Delete

is or is not (respectively) in effect for the specified time. A

[347] Fix | Delete

negative value for *isdst* causes the localtime() function to attempt

[348] Fix | Delete

to divine whether summer time is in effect for the specified time.

[349] Fix | Delete

[350] Fix | Delete

"""

[351] Fix | Delete

if dt is None:

[352] Fix | Delete

return datetime.datetime.now(datetime.timezone.utc).astimezone()

[353] Fix | Delete

if dt.tzinfo is not None:

[354] Fix | Delete

return dt.astimezone()

[355] Fix | Delete

# We have a naive datetime. Convert to a (localtime) timetuple and pass to

[356] Fix | Delete

# system mktime together with the isdst hint. System mktime will return

[357] Fix | Delete

# seconds since epoch.

[358] Fix | Delete

tm = dt.timetuple()[:-1] + (isdst,)

[359] Fix | Delete

seconds = time.mktime(tm)

[360] Fix | Delete

localtm = time.localtime(seconds)

[361] Fix | Delete

try:

[362] Fix | Delete

delta = datetime.timedelta(seconds=localtm.tm_gmtoff)

[363] Fix | Delete

tz = datetime.timezone(delta, localtm.tm_zone)

[364] Fix | Delete

except AttributeError:

[365] Fix | Delete

# Compute UTC offset and compare with the value implied by tm_isdst.

[366] Fix | Delete

# If the values match, use the zone name implied by tm_isdst.

[367] Fix | Delete

delta = dt - datetime.datetime(*time.gmtime(seconds)[:6])

[368] Fix | Delete

dst = time.daylight and localtm.tm_isdst > 0

[369] Fix | Delete

gmtoff = -(time.altzone if dst else time.timezone)

[370] Fix | Delete

if delta == datetime.timedelta(seconds=gmtoff):

[371] Fix | Delete

tz = datetime.timezone(delta, time.tzname[dst])

[372] Fix | Delete

else:

[373] Fix | Delete

tz = datetime.timezone(delta)

[374] Fix | Delete

return dt.replace(tzinfo=tz)

[375] Fix | Delete

[376] Fix | Delete