"""The Tab Nanny despises ambiguous indentation. She knows no mercy.
tabnanny -- Detection of ambiguous indentation
For the time being this module is intended to be called as a script.
However it is possible to import it into an IDE and use the function
Warning: The API provided by this module is likely to change in future
releases; such changes may not be backward compatible.
# Released to the public domain, by Tim Peters, 15 April 1998.
# XXX Note: this is now a standard library module.
# XXX The API needs to undergo changes however; the current code is too
# XXX script-like. This will be addressed later.
if not hasattr(tokenize, 'NL'):
raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
__all__ = ["check", "NannyNag", "process_tokens"]
sys.stderr.write(sep + str(arg))
global verbose, filename_only
opts, args = getopt.getopt(sys.argv[1:], "qv")
except getopt.error, msg:
filename_only = filename_only + 1
errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
class NannyNag(Exception):
Raised by process_tokens() if detecting an ambiguous indent.
Captured and handled in check().
def __init__(self, lineno, msg, line):
self.lineno, self.msg, self.line = lineno, msg, line
If file_or_dir is a directory and not a symbolic link, then recursively
descend the directory tree named by file_or_dir, checking all .py files
along the way. If file_or_dir is an ordinary Python source file, it is
checked for whitespace related problems. The diagnostic messages are
written to standard output using the print statement.
if os.path.isdir(file) and not os.path.islink(file):
print "%r: listing directory" % (file,)
fullname = os.path.join(file, name)
if (os.path.isdir(fullname) and
not os.path.islink(fullname) or
os.path.normcase(name[-3:]) == ".py"):
errprint("%r: I/O Error: %s" % (file, msg))
print "checking %r ..." % file
process_tokens(tokenize.generate_tokens(f.readline))
except tokenize.TokenError, msg:
errprint("%r: Token Error: %s" % (file, msg))
except IndentationError, msg:
errprint("%r: Indentation Error: %s" % (file, msg))
badline = nag.get_lineno()
print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
print "offending line: %r" % (line,)
if ' ' in file: file = '"' + file + '"'
if filename_only: print file
else: print file, badline, repr(line)
print "%r: Clean bill of health." % (file,)
# the characters used for space and tab
# the number of leading whitespace characters in raw
# the number of tabs in raw[:n]
# the normal form as a pair (count, trailing), where:
# a tuple such that raw[:n] contains count[i]
# the number of trailing spaces in raw[:n]
# It's A Theorem that m.indent_level(t) ==
# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
# true iff raw[:n] is of the form (T*)(S*)
S, T = Whitespace.S, Whitespace.T
count = count + [0] * (b - len(count) + 1)
self.norm = tuple(count), b
self.is_simple = len(count) <= 1
# return length of longest contiguous run of spaces (whether or not
def longest_run_of_spaces(self):
count, trailing = self.norm
return max(len(count)-1, trailing)
def indent_level(self, tabsize):
# for i in range(len(count)):
# il = il + (i/tabsize + 1)*tabsize * count[i]
# il = trailing + sum (i/ts + 1)*ts*count[i] =
# trailing + ts * sum (i/ts + 1)*count[i] =
# trailing + ts * sum i/ts*count[i] + count[i] =
# trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
# trailing + ts * [(sum i/ts*count[i]) + num_tabs]
# and note that i/ts*count[i] is 0 when i < ts
count, trailing = self.norm
for i in range(tabsize, len(count)):
il = il + i/tabsize * count[i]
return trailing + tabsize * (il + self.nt)
# return true iff self.indent_level(t) == other.indent_level(t)
return self.norm == other.norm
# return a list of tuples (ts, i1, i2) such that
# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
# Intended to be used after not self.equal(other) is known, in which
# case it will return at least one witnessing tab size.
def not_equal_witness(self, other):
n = max(self.longest_run_of_spaces(),
other.longest_run_of_spaces()) + 1
if self.indent_level(ts) != other.indent_level(ts):
other.indent_level(ts)) )
# Return True iff self.indent_level(t) < other.indent_level(t)
# The algorithm is due to Vincent Broman.
# Easy to prove it's correct.
# Trivial to prove n is sharp (consider T vs ST).
# Unknown whether there's a faster general way. I suspected so at
# For the special (but common!) case where M and N are both of the
# form (T*)(S*), M.less(N) iff M.len() < N.len() and
# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
# Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
if self.is_simple and other.is_simple:
return self.nt <= other.nt
n = max(self.longest_run_of_spaces(),
other.longest_run_of_spaces()) + 1
# the self.n >= other.n test already did it for ts=1
if self.indent_level(ts) >= other.indent_level(ts):
# return a list of tuples (ts, i1, i2) such that
# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
# Intended to be used after not self.less(other) is known, in which
# case it will return at least one witnessing tab size.
def not_less_witness(self, other):
n = max(self.longest_run_of_spaces(),
other.longest_run_of_spaces()) + 1
if self.indent_level(ts) >= other.indent_level(ts):
other.indent_level(ts)) )
firsts = map(lambda tup: str(tup[0]), w)
return prefix + " " + ', '.join(firsts)
def process_tokens(tokens):
NEWLINE = tokenize.NEWLINE
JUNK = tokenize.COMMENT, tokenize.NL
indents = [Whitespace("")]
for (type, token, start, end, line) in tokens:
# a program statement, or ENDMARKER, will eventually follow,
# after some (possibly empty) run of tokens of the form
# (NL | COMMENT)* (INDENT | DEDENT+)?
# If an INDENT appears, setting check_equal is wrong, and will
# be undone when we see the INDENT.
thisguy = Whitespace(token)
if not indents[-1].less(thisguy):
witness = indents[-1].not_less_witness(thisguy)
msg = "indent not greater e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
# there's nothing we need to check here! what's important is
# that when the run of DEDENTs ends, the indentation of the
# program statement (or ENDMARKER) that triggered the run is
# equal to what's left at the top of the indents stack
# Ouch! This assert triggers if the last line of the source
# is indented *and* lacks a newline -- then DEDENTs pop out
# assert check_equal # else no earlier NEWLINE, or an earlier INDENT
elif check_equal and type not in JUNK:
# this is the first "real token" following a NEWLINE, so it
# must be the first token of the next program statement, or an
# ENDMARKER; the "line" argument exposes the leading whitespace
# for this statement; in the case of ENDMARKER, line is an empty
# string, so will properly match the empty string with which the
# "indents" stack was seeded
thisguy = Whitespace(line)
if not indents[-1].equal(thisguy):
witness = indents[-1].not_equal_witness(thisguy)
msg = "indent not equal e.g. " + format_witnesses(witness)
raise NannyNag(start[0], msg, line)
if __name__ == '__main__':