Edit File by line
/home/barbar84/www/wp-conte.../plugins/sujqvwi/AnonR/anonr.TX.../usr/lib64/python2..../lib2to3/pgen2
File: tokenize.py
# Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation.
[0] Fix | Delete
# All rights reserved.
[1] Fix | Delete
[2] Fix | Delete
"""Tokenization help for Python programs.
[3] Fix | Delete
[4] Fix | Delete
generate_tokens(readline) is a generator that breaks a stream of
[5] Fix | Delete
text into Python tokens. It accepts a readline-like method which is called
[6] Fix | Delete
repeatedly to get the next line of input (or "" for EOF). It generates
[7] Fix | Delete
5-tuples with these members:
[8] Fix | Delete
[9] Fix | Delete
the token type (see token.py)
[10] Fix | Delete
the token (a string)
[11] Fix | Delete
the starting (row, column) indices of the token (a 2-tuple of ints)
[12] Fix | Delete
the ending (row, column) indices of the token (a 2-tuple of ints)
[13] Fix | Delete
the original line (string)
[14] Fix | Delete
[15] Fix | Delete
It is designed to match the working of the Python tokenizer exactly, except
[16] Fix | Delete
that it produces COMMENT tokens for comments and gives type OP for all
[17] Fix | Delete
operators
[18] Fix | Delete
[19] Fix | Delete
Older entry points
[20] Fix | Delete
tokenize_loop(readline, tokeneater)
[21] Fix | Delete
tokenize(readline, tokeneater=printtoken)
[22] Fix | Delete
are the same, except instead of generating tokens, tokeneater is a callback
[23] Fix | Delete
function to which the 5 fields described above are passed as 5 arguments,
[24] Fix | Delete
each time a new token is found."""
[25] Fix | Delete
[26] Fix | Delete
__author__ = 'Ka-Ping Yee <ping@lfw.org>'
[27] Fix | Delete
__credits__ = \
[28] Fix | Delete
'GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, Skip Montanaro'
[29] Fix | Delete
[30] Fix | Delete
import string, re
[31] Fix | Delete
from codecs import BOM_UTF8, lookup
[32] Fix | Delete
from lib2to3.pgen2.token import *
[33] Fix | Delete
[34] Fix | Delete
from . import token
[35] Fix | Delete
__all__ = [x for x in dir(token) if x[0] != '_'] + ["tokenize",
[36] Fix | Delete
"generate_tokens", "untokenize"]
[37] Fix | Delete
del token
[38] Fix | Delete
[39] Fix | Delete
try:
[40] Fix | Delete
bytes
[41] Fix | Delete
except NameError:
[42] Fix | Delete
# Support bytes type in Python <= 2.5, so 2to3 turns itself into
[43] Fix | Delete
# valid Python 3 code.
[44] Fix | Delete
bytes = str
[45] Fix | Delete
[46] Fix | Delete
def group(*choices): return '(' + '|'.join(choices) + ')'
[47] Fix | Delete
def any(*choices): return group(*choices) + '*'
[48] Fix | Delete
def maybe(*choices): return group(*choices) + '?'
[49] Fix | Delete
[50] Fix | Delete
Whitespace = r'[ \f\t]*'
[51] Fix | Delete
Comment = r'#[^\r\n]*'
[52] Fix | Delete
Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
[53] Fix | Delete
Name = r'[a-zA-Z_]\w*'
[54] Fix | Delete
[55] Fix | Delete
Binnumber = r'0[bB][01]*'
[56] Fix | Delete
Hexnumber = r'0[xX][\da-fA-F]*[lL]?'
[57] Fix | Delete
Octnumber = r'0[oO]?[0-7]*[lL]?'
[58] Fix | Delete
Decnumber = r'[1-9]\d*[lL]?'
[59] Fix | Delete
Intnumber = group(Binnumber, Hexnumber, Octnumber, Decnumber)
[60] Fix | Delete
Exponent = r'[eE][-+]?\d+'
[61] Fix | Delete
Pointfloat = group(r'\d+\.\d*', r'\.\d+') + maybe(Exponent)
[62] Fix | Delete
Expfloat = r'\d+' + Exponent
[63] Fix | Delete
Floatnumber = group(Pointfloat, Expfloat)
[64] Fix | Delete
Imagnumber = group(r'\d+[jJ]', Floatnumber + r'[jJ]')
[65] Fix | Delete
Number = group(Imagnumber, Floatnumber, Intnumber)
[66] Fix | Delete
[67] Fix | Delete
# Tail end of ' string.
[68] Fix | Delete
Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
[69] Fix | Delete
# Tail end of " string.
[70] Fix | Delete
Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
[71] Fix | Delete
# Tail end of ''' string.
[72] Fix | Delete
Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
[73] Fix | Delete
# Tail end of """ string.
[74] Fix | Delete
Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
[75] Fix | Delete
Triple = group("[ubUB]?[rR]?'''", '[ubUB]?[rR]?"""')
[76] Fix | Delete
# Single-line ' or " string.
[77] Fix | Delete
String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
[78] Fix | Delete
r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
[79] Fix | Delete
[80] Fix | Delete
# Because of leftmost-then-longest match semantics, be sure to put the
[81] Fix | Delete
# longest operators first (e.g., if = came before ==, == would get
[82] Fix | Delete
# recognized as two instances of =).
[83] Fix | Delete
Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"<>", r"!=",
[84] Fix | Delete
r"//=?", r"->",
[85] Fix | Delete
r"[+\-*/%&@|^=<>]=?",
[86] Fix | Delete
r"~")
[87] Fix | Delete
[88] Fix | Delete
Bracket = '[][(){}]'
[89] Fix | Delete
Special = group(r'\r?\n', r'[:;.,`@]')
[90] Fix | Delete
Funny = group(Operator, Bracket, Special)
[91] Fix | Delete
[92] Fix | Delete
PlainToken = group(Number, Funny, String, Name)
[93] Fix | Delete
Token = Ignore + PlainToken
[94] Fix | Delete
[95] Fix | Delete
# First (or only) line of ' or " string.
[96] Fix | Delete
ContStr = group(r"[uUbB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
[97] Fix | Delete
group("'", r'\\\r?\n'),
[98] Fix | Delete
r'[uUbB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
[99] Fix | Delete
group('"', r'\\\r?\n'))
[100] Fix | Delete
PseudoExtras = group(r'\\\r?\n', Comment, Triple)
[101] Fix | Delete
PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
[102] Fix | Delete
[103] Fix | Delete
tokenprog, pseudoprog, single3prog, double3prog = map(
[104] Fix | Delete
re.compile, (Token, PseudoToken, Single3, Double3))
[105] Fix | Delete
endprogs = {"'": re.compile(Single), '"': re.compile(Double),
[106] Fix | Delete
"'''": single3prog, '"""': double3prog,
[107] Fix | Delete
"r'''": single3prog, 'r"""': double3prog,
[108] Fix | Delete
"u'''": single3prog, 'u"""': double3prog,
[109] Fix | Delete
"b'''": single3prog, 'b"""': double3prog,
[110] Fix | Delete
"ur'''": single3prog, 'ur"""': double3prog,
[111] Fix | Delete
"br'''": single3prog, 'br"""': double3prog,
[112] Fix | Delete
"R'''": single3prog, 'R"""': double3prog,
[113] Fix | Delete
"U'''": single3prog, 'U"""': double3prog,
[114] Fix | Delete
"B'''": single3prog, 'B"""': double3prog,
[115] Fix | Delete
"uR'''": single3prog, 'uR"""': double3prog,
[116] Fix | Delete
"Ur'''": single3prog, 'Ur"""': double3prog,
[117] Fix | Delete
"UR'''": single3prog, 'UR"""': double3prog,
[118] Fix | Delete
"bR'''": single3prog, 'bR"""': double3prog,
[119] Fix | Delete
"Br'''": single3prog, 'Br"""': double3prog,
[120] Fix | Delete
"BR'''": single3prog, 'BR"""': double3prog,
[121] Fix | Delete
'r': None, 'R': None,
[122] Fix | Delete
'u': None, 'U': None,
[123] Fix | Delete
'b': None, 'B': None}
[124] Fix | Delete
[125] Fix | Delete
triple_quoted = {}
[126] Fix | Delete
for t in ("'''", '"""',
[127] Fix | Delete
"r'''", 'r"""', "R'''", 'R"""',
[128] Fix | Delete
"u'''", 'u"""', "U'''", 'U"""',
[129] Fix | Delete
"b'''", 'b"""', "B'''", 'B"""',
[130] Fix | Delete
"ur'''", 'ur"""', "Ur'''", 'Ur"""',
[131] Fix | Delete
"uR'''", 'uR"""', "UR'''", 'UR"""',
[132] Fix | Delete
"br'''", 'br"""', "Br'''", 'Br"""',
[133] Fix | Delete
"bR'''", 'bR"""', "BR'''", 'BR"""',):
[134] Fix | Delete
triple_quoted[t] = t
[135] Fix | Delete
single_quoted = {}
[136] Fix | Delete
for t in ("'", '"',
[137] Fix | Delete
"r'", 'r"', "R'", 'R"',
[138] Fix | Delete
"u'", 'u"', "U'", 'U"',
[139] Fix | Delete
"b'", 'b"', "B'", 'B"',
[140] Fix | Delete
"ur'", 'ur"', "Ur'", 'Ur"',
[141] Fix | Delete
"uR'", 'uR"', "UR'", 'UR"',
[142] Fix | Delete
"br'", 'br"', "Br'", 'Br"',
[143] Fix | Delete
"bR'", 'bR"', "BR'", 'BR"', ):
[144] Fix | Delete
single_quoted[t] = t
[145] Fix | Delete
[146] Fix | Delete
tabsize = 8
[147] Fix | Delete
[148] Fix | Delete
class TokenError(Exception): pass
[149] Fix | Delete
[150] Fix | Delete
class StopTokenizing(Exception): pass
[151] Fix | Delete
[152] Fix | Delete
def printtoken(type, token, start, end, line): # for testing
[153] Fix | Delete
(srow, scol) = start
[154] Fix | Delete
(erow, ecol) = end
[155] Fix | Delete
print "%d,%d-%d,%d:\t%s\t%s" % \
[156] Fix | Delete
(srow, scol, erow, ecol, tok_name[type], repr(token))
[157] Fix | Delete
[158] Fix | Delete
def tokenize(readline, tokeneater=printtoken):
[159] Fix | Delete
"""
[160] Fix | Delete
The tokenize() function accepts two parameters: one representing the
[161] Fix | Delete
input stream, and one providing an output mechanism for tokenize().
[162] Fix | Delete
[163] Fix | Delete
The first parameter, readline, must be a callable object which provides
[164] Fix | Delete
the same interface as the readline() method of built-in file objects.
[165] Fix | Delete
Each call to the function should return one line of input as a string.
[166] Fix | Delete
[167] Fix | Delete
The second parameter, tokeneater, must also be a callable object. It is
[168] Fix | Delete
called once for each token, with five arguments, corresponding to the
[169] Fix | Delete
tuples generated by generate_tokens().
[170] Fix | Delete
"""
[171] Fix | Delete
try:
[172] Fix | Delete
tokenize_loop(readline, tokeneater)
[173] Fix | Delete
except StopTokenizing:
[174] Fix | Delete
pass
[175] Fix | Delete
[176] Fix | Delete
# backwards compatible interface
[177] Fix | Delete
def tokenize_loop(readline, tokeneater):
[178] Fix | Delete
for token_info in generate_tokens(readline):
[179] Fix | Delete
tokeneater(*token_info)
[180] Fix | Delete
[181] Fix | Delete
class Untokenizer:
[182] Fix | Delete
[183] Fix | Delete
def __init__(self):
[184] Fix | Delete
self.tokens = []
[185] Fix | Delete
self.prev_row = 1
[186] Fix | Delete
self.prev_col = 0
[187] Fix | Delete
[188] Fix | Delete
def add_whitespace(self, start):
[189] Fix | Delete
row, col = start
[190] Fix | Delete
assert row <= self.prev_row
[191] Fix | Delete
col_offset = col - self.prev_col
[192] Fix | Delete
if col_offset:
[193] Fix | Delete
self.tokens.append(" " * col_offset)
[194] Fix | Delete
[195] Fix | Delete
def untokenize(self, iterable):
[196] Fix | Delete
for t in iterable:
[197] Fix | Delete
if len(t) == 2:
[198] Fix | Delete
self.compat(t, iterable)
[199] Fix | Delete
break
[200] Fix | Delete
tok_type, token, start, end, line = t
[201] Fix | Delete
self.add_whitespace(start)
[202] Fix | Delete
self.tokens.append(token)
[203] Fix | Delete
self.prev_row, self.prev_col = end
[204] Fix | Delete
if tok_type in (NEWLINE, NL):
[205] Fix | Delete
self.prev_row += 1
[206] Fix | Delete
self.prev_col = 0
[207] Fix | Delete
return "".join(self.tokens)
[208] Fix | Delete
[209] Fix | Delete
def compat(self, token, iterable):
[210] Fix | Delete
startline = False
[211] Fix | Delete
indents = []
[212] Fix | Delete
toks_append = self.tokens.append
[213] Fix | Delete
toknum, tokval = token
[214] Fix | Delete
if toknum in (NAME, NUMBER):
[215] Fix | Delete
tokval += ' '
[216] Fix | Delete
if toknum in (NEWLINE, NL):
[217] Fix | Delete
startline = True
[218] Fix | Delete
for tok in iterable:
[219] Fix | Delete
toknum, tokval = tok[:2]
[220] Fix | Delete
[221] Fix | Delete
if toknum in (NAME, NUMBER):
[222] Fix | Delete
tokval += ' '
[223] Fix | Delete
[224] Fix | Delete
if toknum == INDENT:
[225] Fix | Delete
indents.append(tokval)
[226] Fix | Delete
continue
[227] Fix | Delete
elif toknum == DEDENT:
[228] Fix | Delete
indents.pop()
[229] Fix | Delete
continue
[230] Fix | Delete
elif toknum in (NEWLINE, NL):
[231] Fix | Delete
startline = True
[232] Fix | Delete
elif startline and indents:
[233] Fix | Delete
toks_append(indents[-1])
[234] Fix | Delete
startline = False
[235] Fix | Delete
toks_append(tokval)
[236] Fix | Delete
[237] Fix | Delete
cookie_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
[238] Fix | Delete
blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)')
[239] Fix | Delete
[240] Fix | Delete
def _get_normal_name(orig_enc):
[241] Fix | Delete
"""Imitates get_normal_name in tokenizer.c."""
[242] Fix | Delete
# Only care about the first 12 characters.
[243] Fix | Delete
enc = orig_enc[:12].lower().replace("_", "-")
[244] Fix | Delete
if enc == "utf-8" or enc.startswith("utf-8-"):
[245] Fix | Delete
return "utf-8"
[246] Fix | Delete
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
[247] Fix | Delete
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
[248] Fix | Delete
return "iso-8859-1"
[249] Fix | Delete
return orig_enc
[250] Fix | Delete
[251] Fix | Delete
def detect_encoding(readline):
[252] Fix | Delete
"""
[253] Fix | Delete
The detect_encoding() function is used to detect the encoding that should
[254] Fix | Delete
be used to decode a Python source file. It requires one argument, readline,
[255] Fix | Delete
in the same way as the tokenize() generator.
[256] Fix | Delete
[257] Fix | Delete
It will call readline a maximum of twice, and return the encoding used
[258] Fix | Delete
(as a string) and a list of any lines (left as bytes) it has read
[259] Fix | Delete
in.
[260] Fix | Delete
[261] Fix | Delete
It detects the encoding from the presence of a utf-8 bom or an encoding
[262] Fix | Delete
cookie as specified in pep-0263. If both a bom and a cookie are present, but
[263] Fix | Delete
disagree, a SyntaxError will be raised. If the encoding cookie is an invalid
[264] Fix | Delete
charset, raise a SyntaxError. Note that if a utf-8 bom is found,
[265] Fix | Delete
'utf-8-sig' is returned.
[266] Fix | Delete
[267] Fix | Delete
If no encoding is specified, then the default of 'utf-8' will be returned.
[268] Fix | Delete
"""
[269] Fix | Delete
bom_found = False
[270] Fix | Delete
encoding = None
[271] Fix | Delete
default = 'utf-8'
[272] Fix | Delete
def read_or_stop():
[273] Fix | Delete
try:
[274] Fix | Delete
return readline()
[275] Fix | Delete
except StopIteration:
[276] Fix | Delete
return bytes()
[277] Fix | Delete
[278] Fix | Delete
def find_cookie(line):
[279] Fix | Delete
try:
[280] Fix | Delete
line_string = line.decode('ascii')
[281] Fix | Delete
except UnicodeDecodeError:
[282] Fix | Delete
return None
[283] Fix | Delete
match = cookie_re.match(line_string)
[284] Fix | Delete
if not match:
[285] Fix | Delete
return None
[286] Fix | Delete
encoding = _get_normal_name(match.group(1))
[287] Fix | Delete
try:
[288] Fix | Delete
codec = lookup(encoding)
[289] Fix | Delete
except LookupError:
[290] Fix | Delete
# This behaviour mimics the Python interpreter
[291] Fix | Delete
raise SyntaxError("unknown encoding: " + encoding)
[292] Fix | Delete
[293] Fix | Delete
if bom_found:
[294] Fix | Delete
if codec.name != 'utf-8':
[295] Fix | Delete
# This behaviour mimics the Python interpreter
[296] Fix | Delete
raise SyntaxError('encoding problem: utf-8')
[297] Fix | Delete
encoding += '-sig'
[298] Fix | Delete
return encoding
[299] Fix | Delete
[300] Fix | Delete
first = read_or_stop()
[301] Fix | Delete
if first.startswith(BOM_UTF8):
[302] Fix | Delete
bom_found = True
[303] Fix | Delete
first = first[3:]
[304] Fix | Delete
default = 'utf-8-sig'
[305] Fix | Delete
if not first:
[306] Fix | Delete
return default, []
[307] Fix | Delete
[308] Fix | Delete
encoding = find_cookie(first)
[309] Fix | Delete
if encoding:
[310] Fix | Delete
return encoding, [first]
[311] Fix | Delete
if not blank_re.match(first):
[312] Fix | Delete
return default, [first]
[313] Fix | Delete
[314] Fix | Delete
second = read_or_stop()
[315] Fix | Delete
if not second:
[316] Fix | Delete
return default, [first]
[317] Fix | Delete
[318] Fix | Delete
encoding = find_cookie(second)
[319] Fix | Delete
if encoding:
[320] Fix | Delete
return encoding, [first, second]
[321] Fix | Delete
[322] Fix | Delete
return default, [first, second]
[323] Fix | Delete
[324] Fix | Delete
def untokenize(iterable):
[325] Fix | Delete
"""Transform tokens back into Python source code.
[326] Fix | Delete
[327] Fix | Delete
Each element returned by the iterable must be a token sequence
[328] Fix | Delete
with at least two elements, a token number and token value. If
[329] Fix | Delete
only two tokens are passed, the resulting output is poor.
[330] Fix | Delete
[331] Fix | Delete
Round-trip invariant for full input:
[332] Fix | Delete
Untokenized source will match input source exactly
[333] Fix | Delete
[334] Fix | Delete
Round-trip invariant for limited intput:
[335] Fix | Delete
# Output text will tokenize the back to the input
[336] Fix | Delete
t1 = [tok[:2] for tok in generate_tokens(f.readline)]
[337] Fix | Delete
newcode = untokenize(t1)
[338] Fix | Delete
readline = iter(newcode.splitlines(1)).next
[339] Fix | Delete
t2 = [tok[:2] for tokin generate_tokens(readline)]
[340] Fix | Delete
assert t1 == t2
[341] Fix | Delete
"""
[342] Fix | Delete
ut = Untokenizer()
[343] Fix | Delete
return ut.untokenize(iterable)
[344] Fix | Delete
[345] Fix | Delete
def generate_tokens(readline):
[346] Fix | Delete
"""
[347] Fix | Delete
The generate_tokens() generator requires one argument, readline, which
[348] Fix | Delete
must be a callable object which provides the same interface as the
[349] Fix | Delete
readline() method of built-in file objects. Each call to the function
[350] Fix | Delete
should return one line of input as a string. Alternately, readline
[351] Fix | Delete
can be a callable function terminating with StopIteration:
[352] Fix | Delete
readline = open(myfile).next # Example of alternate readline
[353] Fix | Delete
[354] Fix | Delete
The generator produces 5-tuples with these members: the token type; the
[355] Fix | Delete
token string; a 2-tuple (srow, scol) of ints specifying the row and
[356] Fix | Delete
column where the token begins in the source; a 2-tuple (erow, ecol) of
[357] Fix | Delete
ints specifying the row and column where the token ends in the source;
[358] Fix | Delete
and the line on which the token was found. The line passed is the
[359] Fix | Delete
logical line; continuation lines are included.
[360] Fix | Delete
"""
[361] Fix | Delete
lnum = parenlev = continued = 0
[362] Fix | Delete
namechars, numchars = string.ascii_letters + '_', '0123456789'
[363] Fix | Delete
contstr, needcont = '', 0
[364] Fix | Delete
contline = None
[365] Fix | Delete
indents = [0]
[366] Fix | Delete
[367] Fix | Delete
while 1: # loop over lines in stream
[368] Fix | Delete
try:
[369] Fix | Delete
line = readline()
[370] Fix | Delete
except StopIteration:
[371] Fix | Delete
line = ''
[372] Fix | Delete
lnum = lnum + 1
[373] Fix | Delete
pos, max = 0, len(line)
[374] Fix | Delete
[375] Fix | Delete
if contstr: # continued string
[376] Fix | Delete
if not line:
[377] Fix | Delete
raise TokenError, ("EOF in multi-line string", strstart)
[378] Fix | Delete
endmatch = endprog.match(line)
[379] Fix | Delete
if endmatch:
[380] Fix | Delete
pos = end = endmatch.end(0)
[381] Fix | Delete
yield (STRING, contstr + line[:end],
[382] Fix | Delete
strstart, (lnum, end), contline + line)
[383] Fix | Delete
contstr, needcont = '', 0
[384] Fix | Delete
contline = None
[385] Fix | Delete
elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
[386] Fix | Delete
yield (ERRORTOKEN, contstr + line,
[387] Fix | Delete
strstart, (lnum, len(line)), contline)
[388] Fix | Delete
contstr = ''
[389] Fix | Delete
contline = None
[390] Fix | Delete
continue
[391] Fix | Delete
else:
[392] Fix | Delete
contstr = contstr + line
[393] Fix | Delete
contline = contline + line
[394] Fix | Delete
continue
[395] Fix | Delete
[396] Fix | Delete
elif parenlev == 0 and not continued: # new statement
[397] Fix | Delete
if not line: break
[398] Fix | Delete
column = 0
[399] Fix | Delete
while pos < max: # measure leading whitespace
[400] Fix | Delete
if line[pos] == ' ': column = column + 1
[401] Fix | Delete
elif line[pos] == '\t': column = (column//tabsize + 1)*tabsize
[402] Fix | Delete
elif line[pos] == '\f': column = 0
[403] Fix | Delete
else: break
[404] Fix | Delete
pos = pos + 1
[405] Fix | Delete
if pos == max: break
[406] Fix | Delete
[407] Fix | Delete
if line[pos] in '#\r\n': # skip comments or blank lines
[408] Fix | Delete
if line[pos] == '#':
[409] Fix | Delete
comment_token = line[pos:].rstrip('\r\n')
[410] Fix | Delete
nl_pos = pos + len(comment_token)
[411] Fix | Delete
yield (COMMENT, comment_token,
[412] Fix | Delete
(lnum, pos), (lnum, pos + len(comment_token)), line)
[413] Fix | Delete
yield (NL, line[nl_pos:],
[414] Fix | Delete
(lnum, nl_pos), (lnum, len(line)), line)
[415] Fix | Delete
else:
[416] Fix | Delete
yield ((NL, COMMENT)[line[pos] == '#'], line[pos:],
[417] Fix | Delete
(lnum, pos), (lnum, len(line)), line)
[418] Fix | Delete
continue
[419] Fix | Delete
[420] Fix | Delete
if column > indents[-1]: # count indents or dedents
[421] Fix | Delete
indents.append(column)
[422] Fix | Delete
yield (INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
[423] Fix | Delete
while column < indents[-1]:
[424] Fix | Delete
if column not in indents:
[425] Fix | Delete
raise IndentationError(
[426] Fix | Delete
"unindent does not match any outer indentation level",
[427] Fix | Delete
("<tokenize>", lnum, pos, line))
[428] Fix | Delete
indents = indents[:-1]
[429] Fix | Delete
yield (DEDENT, '', (lnum, pos), (lnum, pos), line)
[430] Fix | Delete
[431] Fix | Delete
else: # continued statement
[432] Fix | Delete
if not line:
[433] Fix | Delete
raise TokenError, ("EOF in multi-line statement", (lnum, 0))
[434] Fix | Delete
continued = 0
[435] Fix | Delete
[436] Fix | Delete
while pos < max:
[437] Fix | Delete
pseudomatch = pseudoprog.match(line, pos)
[438] Fix | Delete
if pseudomatch: # scan for tokens
[439] Fix | Delete
start, end = pseudomatch.span(1)
[440] Fix | Delete
spos, epos, pos = (lnum, start), (lnum, end), end
[441] Fix | Delete
token, initial = line[start:end], line[start]
[442] Fix | Delete
[443] Fix | Delete
if initial in numchars or \
[444] Fix | Delete
(initial == '.' and token != '.'): # ordinary number
[445] Fix | Delete
yield (NUMBER, token, spos, epos, line)
[446] Fix | Delete
elif initial in '\r\n':
[447] Fix | Delete
newline = NEWLINE
[448] Fix | Delete
if parenlev > 0:
[449] Fix | Delete
newline = NL
[450] Fix | Delete
yield (newline, token, spos, epos, line)
[451] Fix | Delete
elif initial == '#':
[452] Fix | Delete
assert not token.endswith("\n")
[453] Fix | Delete
yield (COMMENT, token, spos, epos, line)
[454] Fix | Delete
elif token in triple_quoted:
[455] Fix | Delete
endprog = endprogs[token]
[456] Fix | Delete
endmatch = endprog.match(line, pos)
[457] Fix | Delete
if endmatch: # all on one line
[458] Fix | Delete
pos = endmatch.end(0)
[459] Fix | Delete
token = line[start:pos]
[460] Fix | Delete
yield (STRING, token, spos, (lnum, pos), line)
[461] Fix | Delete
else:
[462] Fix | Delete
strstart = (lnum, start) # multiple lines
[463] Fix | Delete
contstr = line[start:]
[464] Fix | Delete
contline = line
[465] Fix | Delete
break
[466] Fix | Delete
elif initial in single_quoted or \
[467] Fix | Delete
token[:2] in single_quoted or \
[468] Fix | Delete
token[:3] in single_quoted:
[469] Fix | Delete
if token[-1] == '\n': # continued string
[470] Fix | Delete
strstart = (lnum, start)
[471] Fix | Delete
endprog = (endprogs[initial] or endprogs[token[1]] or
[472] Fix | Delete
endprogs[token[2]])
[473] Fix | Delete
contstr, needcont = line[start:], 1
[474] Fix | Delete
contline = line
[475] Fix | Delete
break
[476] Fix | Delete
else: # ordinary string
[477] Fix | Delete
yield (STRING, token, spos, epos, line)
[478] Fix | Delete
elif initial in namechars: # ordinary name
[479] Fix | Delete
yield (NAME, token, spos, epos, line)
[480] Fix | Delete
elif initial == '\\': # continued stmt
[481] Fix | Delete
# This yield is new; needed for better idempotency:
[482] Fix | Delete
yield (NL, token, spos, (lnum, pos), line)
[483] Fix | Delete
continued = 1
[484] Fix | Delete
else:
[485] Fix | Delete
if initial in '([{': parenlev = parenlev + 1
[486] Fix | Delete
elif initial in ')]}': parenlev = parenlev - 1
[487] Fix | Delete
yield (OP, token, spos, epos, line)
[488] Fix | Delete
else:
[489] Fix | Delete
yield (ERRORTOKEN, line[pos],
[490] Fix | Delete
(lnum, pos), (lnum, pos+1), line)
[491] Fix | Delete
pos = pos + 1
[492] Fix | Delete
[493] Fix | Delete
for indent in indents[1:]: # pop remaining indent levels
[494] Fix | Delete
yield (DEDENT, '', (lnum, 0), (lnum, 0), '')
[495] Fix | Delete
yield (ENDMARKER, '', (lnum, 0), (lnum, 0), '')
[496] Fix | Delete
[497] Fix | Delete
if __name__ == '__main__': # testing
[498] Fix | Delete
import sys
[499] Fix | Delete
12
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function