Edit File by line

# module 'string' -- A collection of string operations

[0] Fix | Delete

[1] Fix | Delete

# Warning: most of the code you see here isn't normally used nowadays. With

[2] Fix | Delete

# Python 1.6, many of these functions are implemented as methods on the

[3] Fix | Delete

# standard string object. They used to be implemented by a built-in module

[4] Fix | Delete

# called strop, but strop is now obsolete itself.

[5] Fix | Delete

[6] Fix | Delete

"""Common string manipulations.

[7] Fix | Delete

[8] Fix | Delete

Public module variables:

[9] Fix | Delete

[10] Fix | Delete

whitespace -- a string containing all characters considered whitespace

[11] Fix | Delete

lowercase -- a string containing all characters considered lowercase letters

[12] Fix | Delete

uppercase -- a string containing all characters considered uppercase letters

[13] Fix | Delete

letters -- a string containing all characters considered letters

[14] Fix | Delete

digits -- a string containing all characters considered decimal digits

[15] Fix | Delete

hexdigits -- a string containing all characters considered hexadecimal digits

[16] Fix | Delete

octdigits -- a string containing all characters considered octal digits

[17] Fix | Delete

[18] Fix | Delete

"""

[19] Fix | Delete

from warnings import warnpy3k

[20] Fix | Delete

warnpy3k("the stringold module has been removed in Python 3.0", stacklevel=2)

[21] Fix | Delete

del warnpy3k

[22] Fix | Delete

[23] Fix | Delete

# Some strings for ctype-style character classification

[24] Fix | Delete

whitespace = ' \t\n\r\v\f'

[25] Fix | Delete

lowercase = 'abcdefghijklmnopqrstuvwxyz'

[26] Fix | Delete

uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

[27] Fix | Delete

letters = lowercase + uppercase

[28] Fix | Delete

digits = '0123456789'

[29] Fix | Delete

hexdigits = digits + 'abcdef' + 'ABCDEF'

[30] Fix | Delete

octdigits = '01234567'

[31] Fix | Delete

[32] Fix | Delete

# Case conversion helpers

[33] Fix | Delete

_idmap = ''

[34] Fix | Delete

for i in range(256): _idmap = _idmap + chr(i)

[35] Fix | Delete

del i

[36] Fix | Delete

[37] Fix | Delete

# Backward compatible names for exceptions

[38] Fix | Delete

index_error = ValueError

[39] Fix | Delete

atoi_error = ValueError

[40] Fix | Delete

atof_error = ValueError

[41] Fix | Delete

atol_error = ValueError

[42] Fix | Delete

[43] Fix | Delete

# convert UPPER CASE letters to lower case

[44] Fix | Delete

def lower(s):

[45] Fix | Delete

"""lower(s) -> string

[46] Fix | Delete

[47] Fix | Delete

Return a copy of the string s converted to lowercase.

[48] Fix | Delete

[49] Fix | Delete

"""

[50] Fix | Delete

return s.lower()

[51] Fix | Delete

[52] Fix | Delete

# Convert lower case letters to UPPER CASE

[53] Fix | Delete

def upper(s):

[54] Fix | Delete

"""upper(s) -> string

[55] Fix | Delete

[56] Fix | Delete

Return a copy of the string s converted to uppercase.

[57] Fix | Delete

[58] Fix | Delete

"""

[59] Fix | Delete

return s.upper()

[60] Fix | Delete

[61] Fix | Delete

# Swap lower case letters and UPPER CASE

[62] Fix | Delete

def swapcase(s):

[63] Fix | Delete

"""swapcase(s) -> string

[64] Fix | Delete

[65] Fix | Delete

Return a copy of the string s with upper case characters

[66] Fix | Delete

converted to lowercase and vice versa.

[67] Fix | Delete

[68] Fix | Delete

"""

[69] Fix | Delete

return s.swapcase()

[70] Fix | Delete

[71] Fix | Delete

# Strip leading and trailing tabs and spaces

[72] Fix | Delete

def strip(s):

[73] Fix | Delete

"""strip(s) -> string

[74] Fix | Delete

[75] Fix | Delete

Return a copy of the string s with leading and trailing

[76] Fix | Delete

whitespace removed.

[77] Fix | Delete

[78] Fix | Delete

"""

[79] Fix | Delete

return s.strip()

[80] Fix | Delete

[81] Fix | Delete

# Strip leading tabs and spaces

[82] Fix | Delete

def lstrip(s):

[83] Fix | Delete

"""lstrip(s) -> string

[84] Fix | Delete

[85] Fix | Delete

Return a copy of the string s with leading whitespace removed.

[86] Fix | Delete

[87] Fix | Delete

"""

[88] Fix | Delete

return s.lstrip()

[89] Fix | Delete

[90] Fix | Delete

# Strip trailing tabs and spaces

[91] Fix | Delete

def rstrip(s):

[92] Fix | Delete

"""rstrip(s) -> string

[93] Fix | Delete

[94] Fix | Delete

Return a copy of the string s with trailing whitespace

[95] Fix | Delete

removed.

[96] Fix | Delete

[97] Fix | Delete

"""

[98] Fix | Delete

return s.rstrip()

[99] Fix | Delete

[100] Fix | Delete

[101] Fix | Delete

# Split a string into a list of space/tab-separated words

[102] Fix | Delete

def split(s, sep=None, maxsplit=0):

[103] Fix | Delete

"""split(str [,sep [,maxsplit]]) -> list of strings

[104] Fix | Delete

[105] Fix | Delete

Return a list of the words in the string s, using sep as the

[106] Fix | Delete

delimiter string. If maxsplit is nonzero, splits into at most

[107] Fix | Delete

maxsplit words If sep is not specified, any whitespace string

[108] Fix | Delete

is a separator. Maxsplit defaults to 0.

[109] Fix | Delete

[110] Fix | Delete

(split and splitfields are synonymous)

[111] Fix | Delete

[112] Fix | Delete

"""

[113] Fix | Delete

return s.split(sep, maxsplit)

[114] Fix | Delete

splitfields = split

[115] Fix | Delete

[116] Fix | Delete

# Join fields with optional separator

[117] Fix | Delete

def join(words, sep = ' '):

[118] Fix | Delete

"""join(list [,sep]) -> string

[119] Fix | Delete

[120] Fix | Delete

Return a string composed of the words in list, with

[121] Fix | Delete

intervening occurrences of sep. The default separator is a

[122] Fix | Delete

single space.

[123] Fix | Delete

[124] Fix | Delete

(joinfields and join are synonymous)

[125] Fix | Delete

[126] Fix | Delete

"""

[127] Fix | Delete

return sep.join(words)

[128] Fix | Delete

joinfields = join

[129] Fix | Delete

[130] Fix | Delete

# for a little bit of speed

[131] Fix | Delete

_apply = apply

[132] Fix | Delete

[133] Fix | Delete

# Find substring, raise exception if not found

[134] Fix | Delete

def index(s, *args):

[135] Fix | Delete

"""index(s, sub [,start [,end]]) -> int

[136] Fix | Delete

[137] Fix | Delete

Like find but raises ValueError when the substring is not found.

[138] Fix | Delete

[139] Fix | Delete

"""

[140] Fix | Delete

return _apply(s.index, args)

[141] Fix | Delete

[142] Fix | Delete

# Find last substring, raise exception if not found

[143] Fix | Delete

def rindex(s, *args):

[144] Fix | Delete

"""rindex(s, sub [,start [,end]]) -> int

[145] Fix | Delete

[146] Fix | Delete

Like rfind but raises ValueError when the substring is not found.

[147] Fix | Delete

[148] Fix | Delete

"""

[149] Fix | Delete

return _apply(s.rindex, args)

[150] Fix | Delete

[151] Fix | Delete

# Count non-overlapping occurrences of substring

[152] Fix | Delete

def count(s, *args):

[153] Fix | Delete

"""count(s, sub[, start[,end]]) -> int

[154] Fix | Delete

[155] Fix | Delete

Return the number of occurrences of substring sub in string

[156] Fix | Delete

s[start:end]. Optional arguments start and end are

[157] Fix | Delete

interpreted as in slice notation.

[158] Fix | Delete

[159] Fix | Delete

"""

[160] Fix | Delete

return _apply(s.count, args)

[161] Fix | Delete

[162] Fix | Delete

# Find substring, return -1 if not found

[163] Fix | Delete

def find(s, *args):

[164] Fix | Delete

"""find(s, sub [,start [,end]]) -> in

[165] Fix | Delete

[166] Fix | Delete

Return the lowest index in s where substring sub is found,

[167] Fix | Delete

such that sub is contained within s[start,end]. Optional

[168] Fix | Delete

arguments start and end are interpreted as in slice notation.

[169] Fix | Delete

[170] Fix | Delete

Return -1 on failure.

[171] Fix | Delete

[172] Fix | Delete

"""

[173] Fix | Delete

return _apply(s.find, args)

[174] Fix | Delete

[175] Fix | Delete

# Find last substring, return -1 if not found

[176] Fix | Delete

def rfind(s, *args):

[177] Fix | Delete

"""rfind(s, sub [,start [,end]]) -> int

[178] Fix | Delete

[179] Fix | Delete

Return the highest index in s where substring sub is found,

[180] Fix | Delete

such that sub is contained within s[start,end]. Optional

[181] Fix | Delete

arguments start and end are interpreted as in slice notation.

[182] Fix | Delete

[183] Fix | Delete

Return -1 on failure.

[184] Fix | Delete

[185] Fix | Delete

"""

[186] Fix | Delete

return _apply(s.rfind, args)

[187] Fix | Delete

[188] Fix | Delete

# for a bit of speed

[189] Fix | Delete

_float = float

[190] Fix | Delete

_int = int

[191] Fix | Delete

_long = long

[192] Fix | Delete

_StringType = type('')

[193] Fix | Delete

[194] Fix | Delete

# Convert string to float

[195] Fix | Delete

def atof(s):

[196] Fix | Delete

"""atof(s) -> float

[197] Fix | Delete

[198] Fix | Delete

Return the floating point number represented by the string s.

[199] Fix | Delete

[200] Fix | Delete

"""

[201] Fix | Delete

if type(s) == _StringType:

[202] Fix | Delete

return _float(s)

[203] Fix | Delete

else:

[204] Fix | Delete

raise TypeError('argument 1: expected string, %s found' %

[205] Fix | Delete

type(s).__name__)

[206] Fix | Delete

[207] Fix | Delete

# Convert string to integer

[208] Fix | Delete

def atoi(*args):

[209] Fix | Delete

"""atoi(s [,base]) -> int

[210] Fix | Delete

[211] Fix | Delete

Return the integer represented by the string s in the given

[212] Fix | Delete

base, which defaults to 10. The string s must consist of one

[213] Fix | Delete

or more digits, possibly preceded by a sign. If base is 0, it

[214] Fix | Delete

is chosen from the leading characters of s, 0 for octal, 0x or

[215] Fix | Delete

0X for hexadecimal. If base is 16, a preceding 0x or 0X is

[216] Fix | Delete

accepted.

[217] Fix | Delete

[218] Fix | Delete

"""

[219] Fix | Delete

try:

[220] Fix | Delete

s = args[0]

[221] Fix | Delete

except IndexError:

[222] Fix | Delete

raise TypeError('function requires at least 1 argument: %d given' %

[223] Fix | Delete

len(args))

[224] Fix | Delete

# Don't catch type error resulting from too many arguments to int(). The

[225] Fix | Delete

# error message isn't compatible but the error type is, and this function

[226] Fix | Delete

# is complicated enough already.

[227] Fix | Delete

if type(s) == _StringType:

[228] Fix | Delete

return _apply(_int, args)

[229] Fix | Delete

else:

[230] Fix | Delete

raise TypeError('argument 1: expected string, %s found' %

[231] Fix | Delete

type(s).__name__)

[232] Fix | Delete

[233] Fix | Delete

[234] Fix | Delete

# Convert string to long integer

[235] Fix | Delete

def atol(*args):

[236] Fix | Delete

"""atol(s [,base]) -> long

[237] Fix | Delete

[238] Fix | Delete

Return the long integer represented by the string s in the

[239] Fix | Delete

given base, which defaults to 10. The string s must consist

[240] Fix | Delete

of one or more digits, possibly preceded by a sign. If base

[241] Fix | Delete

is 0, it is chosen from the leading characters of s, 0 for

[242] Fix | Delete

octal, 0x or 0X for hexadecimal. If base is 16, a preceding

[243] Fix | Delete

0x or 0X is accepted. A trailing L or l is not accepted,

[244] Fix | Delete

unless base is 0.

[245] Fix | Delete

[246] Fix | Delete

"""

[247] Fix | Delete

try:

[248] Fix | Delete

s = args[0]

[249] Fix | Delete

except IndexError:

[250] Fix | Delete

raise TypeError('function requires at least 1 argument: %d given' %

[251] Fix | Delete

len(args))

[252] Fix | Delete

# Don't catch type error resulting from too many arguments to long(). The

[253] Fix | Delete

# error message isn't compatible but the error type is, and this function

[254] Fix | Delete

# is complicated enough already.

[255] Fix | Delete

if type(s) == _StringType:

[256] Fix | Delete

return _apply(_long, args)

[257] Fix | Delete

else:

[258] Fix | Delete

raise TypeError('argument 1: expected string, %s found' %

[259] Fix | Delete

type(s).__name__)

[260] Fix | Delete

[261] Fix | Delete

[262] Fix | Delete

# Left-justify a string

[263] Fix | Delete

def ljust(s, width):

[264] Fix | Delete

"""ljust(s, width) -> string

[265] Fix | Delete

[266] Fix | Delete

Return a left-justified version of s, in a field of the

[267] Fix | Delete

specified width, padded with spaces as needed. The string is

[268] Fix | Delete

never truncated.

[269] Fix | Delete

[270] Fix | Delete

"""

[271] Fix | Delete

n = width - len(s)

[272] Fix | Delete

if n <= 0: return s

[273] Fix | Delete

return s + ' '*n

[274] Fix | Delete

[275] Fix | Delete

# Right-justify a string

[276] Fix | Delete

def rjust(s, width):

[277] Fix | Delete

"""rjust(s, width) -> string

[278] Fix | Delete

[279] Fix | Delete

Return a right-justified version of s, in a field of the

[280] Fix | Delete

specified width, padded with spaces as needed. The string is

[281] Fix | Delete

never truncated.

[282] Fix | Delete

[283] Fix | Delete

"""

[284] Fix | Delete

n = width - len(s)

[285] Fix | Delete

if n <= 0: return s

[286] Fix | Delete

return ' '*n + s

[287] Fix | Delete

[288] Fix | Delete

# Center a string

[289] Fix | Delete

def center(s, width):

[290] Fix | Delete

"""center(s, width) -> string

[291] Fix | Delete

[292] Fix | Delete

Return a center version of s, in a field of the specified

[293] Fix | Delete

width. padded with spaces as needed. The string is never

[294] Fix | Delete

truncated.

[295] Fix | Delete

[296] Fix | Delete

"""

[297] Fix | Delete

n = width - len(s)

[298] Fix | Delete

if n <= 0: return s

[299] Fix | Delete

half = n/2

[300] Fix | Delete

if n%2 and width%2:

[301] Fix | Delete

# This ensures that center(center(s, i), j) = center(s, j)

[302] Fix | Delete

half = half+1

[303] Fix | Delete

return ' '*half + s + ' '*(n-half)

[304] Fix | Delete

[305] Fix | Delete

# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'

[306] Fix | Delete

# Decadent feature: the argument may be a string or a number

[307] Fix | Delete

# (Use of this is deprecated; it should be a string as with ljust c.s.)

[308] Fix | Delete

def zfill(x, width):

[309] Fix | Delete

"""zfill(x, width) -> string

[310] Fix | Delete

[311] Fix | Delete

Pad a numeric string x with zeros on the left, to fill a field

[312] Fix | Delete

of the specified width. The string x is never truncated.

[313] Fix | Delete

[314] Fix | Delete

"""

[315] Fix | Delete

if type(x) == type(''): s = x

[316] Fix | Delete

else: s = repr(x)

[317] Fix | Delete

n = len(s)

[318] Fix | Delete

if n >= width: return s

[319] Fix | Delete

sign = ''

[320] Fix | Delete

if s[0] in ('-', '+'):

[321] Fix | Delete

sign, s = s[0], s[1:]

[322] Fix | Delete

return sign + '0'*(width-n) + s

[323] Fix | Delete

[324] Fix | Delete

# Expand tabs in a string.

[325] Fix | Delete

# Doesn't take non-printing chars into account, but does understand \n.

[326] Fix | Delete

def expandtabs(s, tabsize=8):

[327] Fix | Delete

"""expandtabs(s [,tabsize]) -> string

[328] Fix | Delete

[329] Fix | Delete

Return a copy of the string s with all tab characters replaced

[330] Fix | Delete

by the appropriate number of spaces, depending on the current

[331] Fix | Delete

column, and the tabsize (default 8).

[332] Fix | Delete

[333] Fix | Delete

"""

[334] Fix | Delete

res = line = ''

[335] Fix | Delete

for c in s:

[336] Fix | Delete

if c == '\t':

[337] Fix | Delete

c = ' '*(tabsize - len(line) % tabsize)

[338] Fix | Delete

line = line + c

[339] Fix | Delete

if c == '\n':

[340] Fix | Delete

res = res + line

[341] Fix | Delete

line = ''

[342] Fix | Delete

return res + line

[343] Fix | Delete

[344] Fix | Delete

# Character translation through look-up table.

[345] Fix | Delete

def translate(s, table, deletions=""):

[346] Fix | Delete

"""translate(s,table [,deletechars]) -> string

[347] Fix | Delete

[348] Fix | Delete

Return a copy of the string s, where all characters occurring

[349] Fix | Delete

in the optional argument deletechars are removed, and the

[350] Fix | Delete

remaining characters have been mapped through the given

[351] Fix | Delete

translation table, which must be a string of length 256.

[352] Fix | Delete

[353] Fix | Delete

"""

[354] Fix | Delete

return s.translate(table, deletions)

[355] Fix | Delete

[356] Fix | Delete

# Capitalize a string, e.g. "aBc dEf" -> "Abc def".

[357] Fix | Delete

def capitalize(s):

[358] Fix | Delete

"""capitalize(s) -> string

[359] Fix | Delete

[360] Fix | Delete

Return a copy of the string s with only its first character

[361] Fix | Delete

capitalized.

[362] Fix | Delete

[363] Fix | Delete

"""

[364] Fix | Delete

return s.capitalize()

[365] Fix | Delete

[366] Fix | Delete

# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".

[367] Fix | Delete

def capwords(s, sep=None):

[368] Fix | Delete

"""capwords(s, [sep]) -> string

[369] Fix | Delete

[370] Fix | Delete

Split the argument into words using split, capitalize each

[371] Fix | Delete

word using capitalize, and join the capitalized words using

[372] Fix | Delete

join. Note that this replaces runs of whitespace characters by

[373] Fix | Delete

a single space.

[374] Fix | Delete

[375] Fix | Delete

"""

[376] Fix | Delete

return join(map(capitalize, s.split(sep)), sep or ' ')

[377] Fix | Delete

[378] Fix | Delete

# Construct a translation string

[379] Fix | Delete

_idmapL = None

[380] Fix | Delete

def maketrans(fromstr, tostr):

[381] Fix | Delete

"""maketrans(frm, to) -> string

[382] Fix | Delete

[383] Fix | Delete

Return a translation table (a string of 256 bytes long)

[384] Fix | Delete

suitable for use in string.translate. The strings frm and to

[385] Fix | Delete

must be of the same length.

[386] Fix | Delete

[387] Fix | Delete

"""

[388] Fix | Delete

if len(fromstr) != len(tostr):

[389] Fix | Delete

raise ValueError, "maketrans arguments must have same length"

[390] Fix | Delete

global _idmapL

[391] Fix | Delete

if not _idmapL:

[392] Fix | Delete

_idmapL = list(_idmap)

[393] Fix | Delete

L = _idmapL[:]

[394] Fix | Delete

fromstr = map(ord, fromstr)

[395] Fix | Delete

for i in range(len(fromstr)):

[396] Fix | Delete

L[fromstr[i]] = tostr[i]

[397] Fix | Delete

return join(L, "")

[398] Fix | Delete

[399] Fix | Delete

# Substring replacement (global)

[400] Fix | Delete

def replace(s, old, new, maxsplit=0):

[401] Fix | Delete

"""replace (str, old, new[, maxsplit]) -> string

[402] Fix | Delete

[403] Fix | Delete

Return a copy of string str with all occurrences of substring

[404] Fix | Delete

old replaced by new. If the optional argument maxsplit is

[405] Fix | Delete

given, only the first maxsplit occurrences are replaced.

[406] Fix | Delete

[407] Fix | Delete

"""

[408] Fix | Delete

return s.replace(old, new, maxsplit)

[409] Fix | Delete

[410] Fix | Delete

[411] Fix | Delete

# XXX: transitional

[412] Fix | Delete

[413] Fix | Delete

# If string objects do not have methods, then we need to use the old string.py

[414] Fix | Delete

# library, which uses strop for many more things than just the few outlined

[415] Fix | Delete

# below.

[416] Fix | Delete

try:

[417] Fix | Delete

''.upper

[418] Fix | Delete

except AttributeError:

[419] Fix | Delete

from stringold import *

[420] Fix | Delete

[421] Fix | Delete

# Try importing optional built-in module "strop" -- if it exists,

[422] Fix | Delete

# it redefines some string operations that are 100-1000 times faster.

[423] Fix | Delete

# It also defines values for whitespace, lowercase and uppercase

[424] Fix | Delete

# that match <ctype.h>'s definitions.

[425] Fix | Delete

[426] Fix | Delete

try:

[427] Fix | Delete

from strop import maketrans, lowercase, uppercase, whitespace

[428] Fix | Delete

letters = lowercase + uppercase

[429] Fix | Delete

except ImportError:

[430] Fix | Delete

pass # Use the original versions

[431] Fix | Delete

[432] Fix | Delete