pos += len(comment_token)
yield TokenInfo(NL, line[pos:],
(lnum, pos), (lnum, len(line)), line)
if column > indents[-1]: # count indents or dedents
yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
while column < indents[-1]:
if column not in indents:
"unindent does not match any outer indentation level",
("<tokenize>", lnum, pos, line))
yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
else: # continued statement
raise TokenError("EOF in multi-line statement", (lnum, 0))
pseudomatch = _compile(PseudoToken).match(line, pos)
if pseudomatch: # scan for tokens
start, end = pseudomatch.span(1)
spos, epos, pos = (lnum, start), (lnum, end), end
token, initial = line[start:end], line[start]
if (initial in numchars or # ordinary number
(initial == '.' and token != '.' and token != '...')):
yield TokenInfo(NUMBER, token, spos, epos, line)
yield TokenInfo(NL, token, spos, epos, line)
yield TokenInfo(NEWLINE, token, spos, epos, line)
assert not token.endswith("\n")
yield TokenInfo(COMMENT, token, spos, epos, line)
elif token in triple_quoted:
endprog = _compile(endpats[token])
endmatch = endprog.match(line, pos)
if endmatch: # all on one line
yield TokenInfo(STRING, token, spos, (lnum, pos), line)
strstart = (lnum, start) # multiple lines
# Check up to the first 3 chars of the token to see if
# they're in the single_quoted set. If so, they start
# We're using the first 3, because we're looking for
# "rb'" (for example) at the start of the token. If
# we switch to longer prefixes, this needs to be
# Note that initial == token[:1].
# Also note that single quote checking must come after
# triple quote checking (above).
elif (initial in single_quoted or
token[:2] in single_quoted or
token[:3] in single_quoted):
if token[-1] == '\n': # continued string
# Again, using the first 3 chars of the
# token. This is looking for the matching end
# regex for the correct type of quote
# character. So it's really looking for
# endpats["'"] or endpats['"'], by trying to
# skip string prefix characters, if any.
endprog = _compile(endpats.get(initial) or
contstr, needcont = line[start:], 1
yield TokenInfo(STRING, token, spos, epos, line)
elif initial.isidentifier(): # ordinary name
yield TokenInfo(NAME, token, spos, epos, line)
elif initial == '\\': # continued stmt
yield TokenInfo(OP, token, spos, epos, line)
yield TokenInfo(ERRORTOKEN, line[pos],
(lnum, pos), (lnum, pos+1), line)
# Add an implicit NEWLINE if the input doesn't end in one
if last_line and last_line[-1] not in '\r\n':
yield TokenInfo(NEWLINE, '', (lnum - 1, len(last_line)), (lnum - 1, len(last_line) + 1), '')
for indent in indents[1:]: # pop remaining indent levels
yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
def generate_tokens(readline):
"""Tokenize a source reading Python code as unicode strings.
This has the same API as tokenize(), except that it expects the *readline*
callable to return str objects instead of bytes.
return _tokenize(readline, None)
# Helper error handling routines
sys.stderr.write(message)
def error(message, filename=None, location=None):
args = (filename,) + location + (message,)
perror("%s:%d:%d: error: %s" % args)
perror("%s: error: %s" % (filename, message))
perror("error: %s" % message)
# Parse the arguments and options
parser = argparse.ArgumentParser(prog='python -m tokenize')
parser.add_argument(dest='filename', nargs='?',
help='the file to tokenize; defaults to stdin')
parser.add_argument('-e', '--exact', dest='exact', action='store_true',
help='display token names using the exact type')
args = parser.parse_args()
with _builtin_open(filename, 'rb') as f:
tokens = list(tokenize(f.readline))
tokens = _tokenize(sys.stdin.readline, None)
# Output the tokenization
token_type = token.exact_type
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
print("%-20s%-15s%-15r" %
(token_range, tok_name[token_type], token.string))
except IndentationError as err:
line, column = err.args[1][1:3]
error(err.args[0], filename, (line, column))
except TokenError as err:
line, column = err.args[1]
error(err.args[0], filename, (line, column))
except SyntaxError as err:
except KeyboardInterrupt:
perror("unexpected error: %s" % err)
if __name__ == "__main__":