"""Locale support module.
The module provides low-level access to the C lib's locale APIs and adds high
level number formatting APIs as well as a locale aliasing engine to complement
The aliasing engine includes support for many commonly used locale names and
maps them to values suitable for passing to the C lib's setlocale() function. It
also includes default encodings for all supported locale names.
from builtins import str as _builtin_str
# Try importing the _locale module.
# If this fails, fall back on a basic 'C' locale emulation.
# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
# trying the import. So __all__ is also fiddled at the end of the file.
__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
"setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
"str", "atof", "atoi", "format", "format_string", "currency",
"normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
"LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
""" strcoll(string,string) -> int.
Compares two strings according to the locale.
""" strxfrm(string) -> string.
Returns a string that behaves for cmp locale-aware.
""" localeconv() -> dict.
Returns numeric and monetary locale-specific parameters.
# 'C' locale default values
return {'grouping': [127],
def setlocale(category, value=None):
""" setlocale(integer,string=None) -> string.
Activates/queries locale processing.
if value not in (None, '', 'C'):
raise Error('_locale emulation only supports "C" locale')
# These may or may not exist in _locale, so be sure to set them.
if 'strxfrm' not in globals():
if 'strcoll' not in globals():
# With this dict, you can override some items of localeconv's return value.
# This is useful for testing purposes.
_override_localeconv = {}
@functools.wraps(_localeconv)
d.update(_override_localeconv)
### Number formatting APIs
# Author: Martin von Loewis
# improved by Georg Brandl
# Iterate over grouping intervals
def _grouping_intervals(grouping):
for interval in grouping:
# if grouping is -1, we are done
# 0: re-use last group ad infinitum
if last_interval is None:
raise ValueError("invalid grouping")
#perform the grouping from right to left
def _group(s, monetary=False):
thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
grouping = conv[monetary and 'mon_grouping' or 'grouping']
right_spaces = s[len(stripped):]
for interval in _grouping_intervals(grouping):
if not s or s[-1] not in "0123456789":
# only non-digit characters remain (sign, spaces)
groups.append(s[-interval:])
left_spaces + thousands_sep.join(groups) + right_spaces,
len(thousands_sep) * (len(groups) - 1)
# Strip a given amount of excess padding from the given string
def _strip_padding(s, amount):
while amount and s[lpos] == ' ':
while amount and s[rpos] == ' ':
_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
def format(percent, value, grouping=False, monetary=False, *additional):
"""Returns the locale-aware substitution of a %? specifier
additional is for format strings which contain one or more
# this is only for one-percent-specifier strings and this should be checked
match = _percent_re.match(percent)
if not match or len(match.group())!= len(percent):
raise ValueError(("format() must be given exactly one %%char "
"format specifier, %s not valid") % repr(percent))
return _format(percent, value, grouping, monetary, *additional)
def _format(percent, value, grouping=False, monetary=False, *additional):
formatted = percent % ((value,) + additional)
formatted = percent % value
# floats and decimal ints need special action!
if percent[-1] in 'eEfFgG':
parts = formatted.split('.')
parts[0], seps = _group(parts[0], monetary=monetary)
decimal_point = localeconv()[monetary and 'mon_decimal_point'
formatted = decimal_point.join(parts)
formatted = _strip_padding(formatted, seps)
elif percent[-1] in 'diu':
formatted, seps = _group(formatted, monetary=monetary)
formatted = _strip_padding(formatted, seps)
def format_string(f, val, grouping=False):
"""Formats a string in the same way that the % formatting would use,
but takes the current locale into account.
Grouping is applied if the third parameter is true."""
percents = list(_percent_re.finditer(f))
new_f = _percent_re.sub('%s', f)
if isinstance(val, collections.Mapping):
if perc.group()[-1]=='%':
new_val.append(format(perc.group(), val, grouping))
if not isinstance(val, tuple):
if perc.group()[-1]=='%':
starcount = perc.group('modifiers').count('*')
new_val.append(_format(perc.group(),
*val[i+1:i+1+starcount]))
def currency(val, symbol=True, grouping=False, international=False):
"""Formats val according to the currency settings
in the current locale."""
# check for illegal values
digits = conv[international and 'int_frac_digits' or 'frac_digits']
raise ValueError("Currency formatting is not possible using "
s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
# '<' and '>' are markers if the sign must be inserted between symbol and value
smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
s = smb + (separated and ' ' or '') + s
s = s + (separated and ' ' or '') + smb
sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
sign = conv[val<0 and 'negative_sign' or 'positive_sign']
# the default if nothing specified;
# this should be the most fitting sign position
return s.replace('<', '').replace('>', '')
"""Convert float to string, taking the locale into account."""
return format("%.12g", val)
"Parses a string as a normalized number according to the locale settings."
#First, get rid of the grouping
ts = conv['thousands_sep']
string = string.replace(ts, '')
#next, replace the decimal point with a dot
dd = conv['decimal_point']
string = string.replace(dd, '.')
def atof(string, func=float):
"Parses a string as a float according to the locale settings."
return func(delocalize(string))
"Converts a string to an integer according to the locale settings."
return int(delocalize(string))
s1 = format("%d", 123456789,1)
print(s1, "is", atoi(s1))
print(s1, "is", atof(s1))
### Locale name aliasing engine
# Author: Marc-Andre Lemburg, mal@lemburg.com
# Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
# store away the low-level version of setlocale (it's
def _replace_encoding(code, encoding):
langname = code[:code.index('.')]
# Convert the encoding to a C lib compatible encoding string
norm_encoding = encodings.normalize_encoding(encoding)
#print('norm encoding: %r' % norm_encoding)
norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
#print('aliased encoding: %r' % norm_encoding)
norm_encoding = norm_encoding.lower()
if norm_encoding in locale_encoding_alias:
encoding = locale_encoding_alias[norm_encoding]
norm_encoding = norm_encoding.replace('_', '')
norm_encoding = norm_encoding.replace('-', '')
if norm_encoding in locale_encoding_alias:
encoding = locale_encoding_alias[norm_encoding]
#print('found encoding %r' % encoding)
return langname + '.' + encoding
def _append_modifier(code, modifier):
return code + '.ISO8859-15'
_, _, encoding = code.partition('.')
if encoding in ('ISO8859-15', 'UTF-8'):
if encoding == 'ISO8859-1':
return _replace_encoding(code, 'ISO8859-15')
return code + '@' + modifier
def normalize(localename):
""" Returns a normalized locale code for the given locale
The returned locale code is formatted for use with
If normalization fails, the original name is returned
If the given encoding is not known, the function defaults to
the default encoding for the locale code just like setlocale()
# Normalize the locale name and extract the encoding and modifier
code = localename.lower()
# ':' is sometimes used as encoding delimiter.
code = code.replace(':', '.')
code, modifier = code.split('@', 1)
langname, encoding = code.split('.')[:2]
# First lookup: fullname (possibly with encoding and modifier)
norm_encoding = encoding.replace('-', '')
norm_encoding = norm_encoding.replace('_', '')
lang_enc += '.' + norm_encoding
lookup_name += '@' + modifier
code = locale_alias.get(lookup_name, None)
#print('first lookup failed')
# Second try: fullname without modifier (possibly with encoding)
code = locale_alias.get(lang_enc, None)
#print('lookup without modifier succeeded')
return _append_modifier(code, modifier)
if code.split('@', 1)[1].lower() == modifier:
#print('second lookup failed')
# Third try: langname (without encoding, possibly with modifier)
lookup_name += '@' + modifier
code = locale_alias.get(lookup_name, None)
#print('lookup without encoding succeeded')
return _replace_encoding(code, encoding)
code, modifier = code.split('@', 1)
return _replace_encoding(code, encoding) + '@' + modifier
# Fourth try: langname (without encoding and modifier)
code = locale_alias.get(langname, None)
#print('lookup without modifier and encoding succeeded')
code = _replace_encoding(code, encoding)
return _append_modifier(code, modifier)
code, defmod = code.split('@', 1)
if defmod.lower() == modifier:
return _replace_encoding(code, encoding) + '@' + defmod
def _parse_localename(localename):
""" Parses the locale code for localename and returns the
result as tuple (language code, encoding).
The localename is normalized and passed through the locale
alias engine. A ValueError is raised in case the locale name
The language code corresponds to RFC 1766. code and encoding
can be None in case the values cannot be determined or are
unknown to this implementation.
code = normalize(localename)
# Deal with locale modifiers
code, modifier = code.split('@', 1)
if modifier == 'euro' and '.' not in code:
# Assume Latin-9 for @euro locales. This is bogus,
# since some systems may use other encodings for these
# locales. Also, we ignore other modifiers.
return code, 'iso-8859-15'
return tuple(code.split('.')[:2])
raise ValueError('unknown locale: %s' % localename)
def _build_localename(localetuple):
""" Builds a locale code from the given tuple (language code,
No aliasing or normalizing takes place.