"""Locale support module.
The module provides low-level access to the C lib's locale APIs and adds high
level number formatting APIs as well as a locale aliasing engine to complement
The aliasing engine includes support for many commonly used locale names and
maps them to values suitable for passing to the C lib's setlocale() function. It
also includes default encodings for all supported locale names.
# keep a copy of the builtin str type, because 'str' name is overridden
# in globals by a function below
# If Python is built without Unicode support, the unicode type
# will not exist. Fake one.
# Try importing the _locale module.
# If this fails, fall back on a basic 'C' locale emulation.
# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
# trying the import. So __all__ is also fiddled at the end of the file.
__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
"setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
"str", "atof", "atoi", "format", "format_string", "currency",
"normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
"LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
""" localeconv() -> dict.
Returns numeric and monetary locale-specific parameters.
# 'C' locale default values
return {'grouping': [127],
def setlocale(category, value=None):
""" setlocale(integer,string=None) -> string.
Activates/queries locale processing.
if value not in (None, '', 'C'):
raise Error, '_locale emulation only supports "C" locale'
""" strcoll(string,string) -> int.
Compares two strings according to the locale.
""" strxfrm(string) -> string.
Returns a string that behaves for cmp locale-aware.
# With this dict, you can override some items of localeconv's return value.
# This is useful for testing purposes.
_override_localeconv = {}
@functools.wraps(_localeconv)
d.update(_override_localeconv)
### Number formatting APIs
# Author: Martin von Loewis
# improved by Georg Brandl
# Iterate over grouping intervals
def _grouping_intervals(grouping):
for interval in grouping:
# if grouping is -1, we are done
# 0: re-use last group ad infinitum
if last_interval is None:
raise ValueError("invalid grouping")
#perform the grouping from right to left
def _group(s, monetary=False):
thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
grouping = conv[monetary and 'mon_grouping' or 'grouping']
right_spaces = s[len(stripped):]
for interval in _grouping_intervals(grouping):
if not s or s[-1] not in "0123456789":
# only non-digit characters remain (sign, spaces)
groups.append(s[-interval:])
left_spaces + thousands_sep.join(groups) + right_spaces,
len(thousands_sep) * (len(groups) - 1)
# Strip a given amount of excess padding from the given string
def _strip_padding(s, amount):
while amount and s[lpos] == ' ':
while amount and s[rpos] == ' ':
_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
def format(percent, value, grouping=False, monetary=False, *additional):
"""Returns the locale-aware substitution of a %? specifier
additional is for format strings which contain one or more
# this is only for one-percent-specifier strings and this should be checked
match = _percent_re.match(percent)
if not match or len(match.group())!= len(percent):
raise ValueError(("format() must be given exactly one %%char "
"format specifier, %s not valid") % repr(percent))
return _format(percent, value, grouping, monetary, *additional)
def _format(percent, value, grouping=False, monetary=False, *additional):
formatted = percent % ((value,) + additional)
formatted = percent % value
# floats and decimal ints need special action!
if percent[-1] in 'eEfFgG':
parts = formatted.split('.')
parts[0], seps = _group(parts[0], monetary=monetary)
decimal_point = localeconv()[monetary and 'mon_decimal_point'
formatted = decimal_point.join(parts)
formatted = _strip_padding(formatted, seps)
elif percent[-1] in 'diu':
formatted, seps = _group(formatted, monetary=monetary)
formatted = _strip_padding(formatted, seps)
def format_string(f, val, grouping=False):
"""Formats a string in the same way that the % formatting would use,
but takes the current locale into account.
Grouping is applied if the third parameter is true."""
percents = list(_percent_re.finditer(f))
new_f = _percent_re.sub('%s', f)
if operator.isMappingType(val):
if perc.group()[-1]=='%':
new_val.append(format(perc.group(), val, grouping))
if not isinstance(val, tuple):
if perc.group()[-1]=='%':
starcount = perc.group('modifiers').count('*')
new_val.append(_format(perc.group(),
*val[i+1:i+1+starcount]))
def currency(val, symbol=True, grouping=False, international=False):
"""Formats val according to the currency settings
in the current locale."""
# check for illegal values
digits = conv[international and 'int_frac_digits' or 'frac_digits']
raise ValueError("Currency formatting is not possible using "
s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
# '<' and '>' are markers if the sign must be inserted between symbol and value
smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
s = smb + (separated and ' ' or '') + s
s = s + (separated and ' ' or '') + smb
sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
sign = conv[val<0 and 'negative_sign' or 'positive_sign']
# the default if nothing specified;
# this should be the most fitting sign position
return s.replace('<', '').replace('>', '')
"""Convert float to string, taking the locale into account."""
return format("%.12g", val)
def atof(string, func=float):
"Parses a string as a float according to the locale settings."
#First, get rid of the grouping
ts = localeconv()['thousands_sep']
string = string.replace(ts, '')
#next, replace the decimal point with a dot
dd = localeconv()['decimal_point']
string = string.replace(dd, '.')
#finally, parse the string
"Converts a string to an integer according to the locale settings."
s1 = format("%d", 123456789,1)
### Locale name aliasing engine
# Author: Marc-Andre Lemburg, mal@lemburg.com
# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
# store away the low-level version of setlocale (it's
# Avoid relying on the locale-dependent .lower() method
_ascii_lower_map = ''.join(
chr(x + 32 if x >= ord('A') and x <= ord('Z') else x)
def _replace_encoding(code, encoding):
langname = code[:code.index('.')]
# Convert the encoding to a C lib compatible encoding string
norm_encoding = encodings.normalize_encoding(encoding)
#print('norm encoding: %r' % norm_encoding)
norm_encoding = encodings.aliases.aliases.get(norm_encoding,
#print('aliased encoding: %r' % norm_encoding)
encoding = locale_encoding_alias.get(norm_encoding,
#print('found encoding %r' % encoding)
return langname + '.' + encoding
def normalize(localename):
""" Returns a normalized locale code for the given locale
The returned locale code is formatted for use with
If normalization fails, the original name is returned
If the given encoding is not known, the function defaults to
the default encoding for the locale code just like setlocale()
# Normalize the locale name and extract the encoding and modifier
if isinstance(localename, _unicode):
localename = localename.encode('ascii')
code = localename.translate(_ascii_lower_map)
# ':' is sometimes used as encoding delimiter.
code = code.replace(':', '.')
code, modifier = code.split('@', 1)
langname, encoding = code.split('.')[:2]
# First lookup: fullname (possibly with encoding and modifier)
norm_encoding = encoding.replace('-', '')
norm_encoding = norm_encoding.replace('_', '')
lang_enc += '.' + norm_encoding
lookup_name += '@' + modifier
code = locale_alias.get(lookup_name, None)
#print('first lookup failed')
# Second try: fullname without modifier (possibly with encoding)
code = locale_alias.get(lang_enc, None)
#print('lookup without modifier succeeded')
return code + '@' + modifier
if code.split('@', 1)[1].translate(_ascii_lower_map) == modifier:
#print('second lookup failed')
# Third try: langname (without encoding, possibly with modifier)
lookup_name += '@' + modifier
code = locale_alias.get(lookup_name, None)
#print('lookup without encoding succeeded')
return _replace_encoding(code, encoding)
code, modifier = code.split('@', 1)
return _replace_encoding(code, encoding) + '@' + modifier
# Fourth try: langname (without encoding and modifier)
code = locale_alias.get(langname, None)
#print('lookup without modifier and encoding succeeded')
return _replace_encoding(code, encoding) + '@' + modifier
code, defmod = code.split('@', 1)
if defmod.translate(_ascii_lower_map) == modifier:
return _replace_encoding(code, encoding) + '@' + defmod
def _parse_localename(localename):
""" Parses the locale code for localename and returns the
result as tuple (language code, encoding).
The localename is normalized and passed through the locale
alias engine. A ValueError is raised in case the locale name
The language code corresponds to RFC 1766. code and encoding
can be None in case the values cannot be determined or are
unknown to this implementation.
code = normalize(localename)
# Deal with locale modifiers
code, modifier = code.split('@', 1)
if modifier == 'euro' and '.' not in code:
# Assume Latin-9 for @euro locales. This is bogus,
# since some systems may use other encodings for these
# locales. Also, we ignore other modifiers.
return code, 'iso-8859-15'
return tuple(code.split('.')[:2])
raise ValueError, 'unknown locale: %s' % localename
def _build_localename(localetuple):
""" Builds a locale code from the given tuple (language code,
No aliasing or normalizing takes place.
language, encoding = localetuple
return language + '.' + encoding
def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
""" Tries to determine the default locale settings and returns
them as tuple (language code, encoding).
According to POSIX, a program which has not called