Edit File by line

#! /usr/libexec/platform-python

[0] Fix | Delete

[1] Fix | Delete

"""The Tab Nanny despises ambiguous indentation. She knows no mercy.

[2] Fix | Delete

[3] Fix | Delete

tabnanny -- Detection of ambiguous indentation

[4] Fix | Delete

[5] Fix | Delete

For the time being this module is intended to be called as a script.

[6] Fix | Delete

However it is possible to import it into an IDE and use the function

[7] Fix | Delete

check() described below.

[8] Fix | Delete

[9] Fix | Delete

Warning: The API provided by this module is likely to change in future

[10] Fix | Delete

releases; such changes may not be backward compatible.

[11] Fix | Delete

"""

[12] Fix | Delete

[13] Fix | Delete

# Released to the public domain, by Tim Peters, 15 April 1998.

[14] Fix | Delete

[15] Fix | Delete

# XXX Note: this is now a standard library module.

[16] Fix | Delete

# XXX The API needs to undergo changes however; the current code is too

[17] Fix | Delete

# XXX script-like. This will be addressed later.

[18] Fix | Delete

[19] Fix | Delete

__version__ = "6"

[20] Fix | Delete

[21] Fix | Delete

import os

[22] Fix | Delete

import sys

[23] Fix | Delete

import getopt

[24] Fix | Delete

import tokenize

[25] Fix | Delete

if not hasattr(tokenize, 'NL'):

[26] Fix | Delete

raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")

[27] Fix | Delete

[28] Fix | Delete

__all__ = ["check", "NannyNag", "process_tokens"]

[29] Fix | Delete

[30] Fix | Delete

verbose = 0

[31] Fix | Delete

filename_only = 0

[32] Fix | Delete

[33] Fix | Delete

def errprint(*args):

[34] Fix | Delete

sep = ""

[35] Fix | Delete

for arg in args:

[36] Fix | Delete

sys.stderr.write(sep + str(arg))

[37] Fix | Delete

sep = " "

[38] Fix | Delete

sys.stderr.write("\n")

[39] Fix | Delete

[40] Fix | Delete

def main():

[41] Fix | Delete

global verbose, filename_only

[42] Fix | Delete

try:

[43] Fix | Delete

opts, args = getopt.getopt(sys.argv[1:], "qv")

[44] Fix | Delete

except getopt.error as msg:

[45] Fix | Delete

errprint(msg)

[46] Fix | Delete

return

[47] Fix | Delete

for o, a in opts:

[48] Fix | Delete

if o == '-q':

[49] Fix | Delete

filename_only = filename_only + 1

[50] Fix | Delete

if o == '-v':

[51] Fix | Delete

verbose = verbose + 1

[52] Fix | Delete

if not args:

[53] Fix | Delete

errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")

[54] Fix | Delete

return

[55] Fix | Delete

for arg in args:

[56] Fix | Delete

check(arg)

[57] Fix | Delete

[58] Fix | Delete

class NannyNag(Exception):

[59] Fix | Delete

"""

[60] Fix | Delete

Raised by process_tokens() if detecting an ambiguous indent.

[61] Fix | Delete

Captured and handled in check().

[62] Fix | Delete

"""

[63] Fix | Delete

def __init__(self, lineno, msg, line):

[64] Fix | Delete

self.lineno, self.msg, self.line = lineno, msg, line

[65] Fix | Delete

def get_lineno(self):

[66] Fix | Delete

return self.lineno

[67] Fix | Delete

def get_msg(self):

[68] Fix | Delete

return self.msg

[69] Fix | Delete

def get_line(self):

[70] Fix | Delete

return self.line

[71] Fix | Delete

[72] Fix | Delete

def check(file):

[73] Fix | Delete

"""check(file_or_dir)

[74] Fix | Delete

[75] Fix | Delete

If file_or_dir is a directory and not a symbolic link, then recursively

[76] Fix | Delete

descend the directory tree named by file_or_dir, checking all .py files

[77] Fix | Delete

along the way. If file_or_dir is an ordinary Python source file, it is

[78] Fix | Delete

checked for whitespace related problems. The diagnostic messages are

[79] Fix | Delete

written to standard output using the print statement.

[80] Fix | Delete

"""

[81] Fix | Delete

[82] Fix | Delete

if os.path.isdir(file) and not os.path.islink(file):

[83] Fix | Delete

if verbose:

[84] Fix | Delete

print("%r: listing directory" % (file,))

[85] Fix | Delete

names = os.listdir(file)

[86] Fix | Delete

for name in names:

[87] Fix | Delete

fullname = os.path.join(file, name)

[88] Fix | Delete

if (os.path.isdir(fullname) and

[89] Fix | Delete

not os.path.islink(fullname) or

[90] Fix | Delete

os.path.normcase(name[-3:]) == ".py"):

[91] Fix | Delete

check(fullname)

[92] Fix | Delete

return

[93] Fix | Delete

[94] Fix | Delete

try:

[95] Fix | Delete

f = tokenize.open(file)

[96] Fix | Delete

except OSError as msg:

[97] Fix | Delete

errprint("%r: I/O Error: %s" % (file, msg))

[98] Fix | Delete

return

[99] Fix | Delete

[100] Fix | Delete

if verbose > 1:

[101] Fix | Delete

print("checking %r ..." % file)

[102] Fix | Delete

[103] Fix | Delete

try:

[104] Fix | Delete

process_tokens(tokenize.generate_tokens(f.readline))

[105] Fix | Delete

[106] Fix | Delete

except tokenize.TokenError as msg:

[107] Fix | Delete

errprint("%r: Token Error: %s" % (file, msg))

[108] Fix | Delete

return

[109] Fix | Delete

[110] Fix | Delete

except IndentationError as msg:

[111] Fix | Delete

errprint("%r: Indentation Error: %s" % (file, msg))

[112] Fix | Delete

return

[113] Fix | Delete

[114] Fix | Delete

except NannyNag as nag:

[115] Fix | Delete

badline = nag.get_lineno()

[116] Fix | Delete

line = nag.get_line()

[117] Fix | Delete

if verbose:

[118] Fix | Delete

print("%r: *** Line %d: trouble in tab city! ***" % (file, badline))

[119] Fix | Delete

print("offending line: %r" % (line,))

[120] Fix | Delete

print(nag.get_msg())

[121] Fix | Delete

else:

[122] Fix | Delete

if ' ' in file: file = '"' + file + '"'

[123] Fix | Delete

if filename_only: print(file)

[124] Fix | Delete

else: print(file, badline, repr(line))

[125] Fix | Delete

return

[126] Fix | Delete

[127] Fix | Delete

finally:

[128] Fix | Delete

f.close()

[129] Fix | Delete

[130] Fix | Delete

if verbose:

[131] Fix | Delete

print("%r: Clean bill of health." % (file,))

[132] Fix | Delete

[133] Fix | Delete

class Whitespace:

[134] Fix | Delete

# the characters used for space and tab

[135] Fix | Delete

S, T = ' \t'

[136] Fix | Delete

[137] Fix | Delete

# members:

[138] Fix | Delete

# raw

[139] Fix | Delete

# the original string

[140] Fix | Delete

# n

[141] Fix | Delete

# the number of leading whitespace characters in raw

[142] Fix | Delete

# nt

[143] Fix | Delete

# the number of tabs in raw[:n]

[144] Fix | Delete

# norm

[145] Fix | Delete

# the normal form as a pair (count, trailing), where:

[146] Fix | Delete

# count

[147] Fix | Delete

# a tuple such that raw[:n] contains count[i]

[148] Fix | Delete

# instances of S * i + T

[149] Fix | Delete

# trailing

[150] Fix | Delete

# the number of trailing spaces in raw[:n]

[151] Fix | Delete

# It's A Theorem that m.indent_level(t) ==

[152] Fix | Delete

# n.indent_level(t) for all t >= 1 iff m.norm == n.norm.

[153] Fix | Delete

# is_simple

[154] Fix | Delete

# true iff raw[:n] is of the form (T*)(S*)

[155] Fix | Delete

[156] Fix | Delete

def __init__(self, ws):

[157] Fix | Delete

self.raw = ws

[158] Fix | Delete

S, T = Whitespace.S, Whitespace.T

[159] Fix | Delete

count = []

[160] Fix | Delete

b = n = nt = 0

[161] Fix | Delete

for ch in self.raw:

[162] Fix | Delete

if ch == S:

[163] Fix | Delete

n = n + 1

[164] Fix | Delete

b = b + 1

[165] Fix | Delete

elif ch == T:

[166] Fix | Delete

n = n + 1

[167] Fix | Delete

nt = nt + 1

[168] Fix | Delete

if b >= len(count):

[169] Fix | Delete

count = count + [0] * (b - len(count) + 1)

[170] Fix | Delete

count[b] = count[b] + 1

[171] Fix | Delete

b = 0

[172] Fix | Delete

else:

[173] Fix | Delete

break

[174] Fix | Delete

self.n = n

[175] Fix | Delete

self.nt = nt

[176] Fix | Delete

self.norm = tuple(count), b

[177] Fix | Delete

self.is_simple = len(count) <= 1

[178] Fix | Delete

[179] Fix | Delete

# return length of longest contiguous run of spaces (whether or not

[180] Fix | Delete

# preceding a tab)

[181] Fix | Delete

def longest_run_of_spaces(self):

[182] Fix | Delete

count, trailing = self.norm

[183] Fix | Delete

return max(len(count)-1, trailing)

[184] Fix | Delete

[185] Fix | Delete

def indent_level(self, tabsize):

[186] Fix | Delete

# count, il = self.norm

[187] Fix | Delete

# for i in range(len(count)):

[188] Fix | Delete

# if count[i]:

[189] Fix | Delete

# il = il + (i//tabsize + 1)*tabsize * count[i]

[190] Fix | Delete

# return il

[191] Fix | Delete

[192] Fix | Delete

# quicker:

[193] Fix | Delete

# il = trailing + sum (i//ts + 1)*ts*count[i] =

[194] Fix | Delete

# trailing + ts * sum (i//ts + 1)*count[i] =

[195] Fix | Delete

# trailing + ts * sum i//ts*count[i] + count[i] =

[196] Fix | Delete

# trailing + ts * [(sum i//ts*count[i]) + (sum count[i])] =

[197] Fix | Delete

# trailing + ts * [(sum i//ts*count[i]) + num_tabs]

[198] Fix | Delete

# and note that i//ts*count[i] is 0 when i < ts

[199] Fix | Delete

[200] Fix | Delete

count, trailing = self.norm

[201] Fix | Delete

il = 0

[202] Fix | Delete

for i in range(tabsize, len(count)):

[203] Fix | Delete

il = il + i//tabsize * count[i]

[204] Fix | Delete

return trailing + tabsize * (il + self.nt)

[205] Fix | Delete

[206] Fix | Delete

# return true iff self.indent_level(t) == other.indent_level(t)

[207] Fix | Delete

# for all t >= 1

[208] Fix | Delete

def equal(self, other):

[209] Fix | Delete

return self.norm == other.norm

[210] Fix | Delete

[211] Fix | Delete

# return a list of tuples (ts, i1, i2) such that

[212] Fix | Delete

# i1 == self.indent_level(ts) != other.indent_level(ts) == i2.

[213] Fix | Delete

# Intended to be used after not self.equal(other) is known, in which

[214] Fix | Delete

# case it will return at least one witnessing tab size.

[215] Fix | Delete

def not_equal_witness(self, other):

[216] Fix | Delete

n = max(self.longest_run_of_spaces(),

[217] Fix | Delete

other.longest_run_of_spaces()) + 1

[218] Fix | Delete

a = []

[219] Fix | Delete

for ts in range(1, n+1):

[220] Fix | Delete

if self.indent_level(ts) != other.indent_level(ts):

[221] Fix | Delete

a.append( (ts,

[222] Fix | Delete

self.indent_level(ts),

[223] Fix | Delete

other.indent_level(ts)) )

[224] Fix | Delete

return a

[225] Fix | Delete

[226] Fix | Delete

# Return True iff self.indent_level(t) < other.indent_level(t)

[227] Fix | Delete

# for all t >= 1.

[228] Fix | Delete

# The algorithm is due to Vincent Broman.

[229] Fix | Delete

# Easy to prove it's correct.

[230] Fix | Delete

# XXXpost that.

[231] Fix | Delete

# Trivial to prove n is sharp (consider T vs ST).

[232] Fix | Delete

# Unknown whether there's a faster general way. I suspected so at

[233] Fix | Delete

# first, but no longer.

[234] Fix | Delete

# For the special (but common!) case where M and N are both of the

[235] Fix | Delete

# form (T*)(S*), M.less(N) iff M.len() < N.len() and

[236] Fix | Delete

# M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.

[237] Fix | Delete

# XXXwrite that up.

[238] Fix | Delete

# Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.

[239] Fix | Delete

def less(self, other):

[240] Fix | Delete

if self.n >= other.n:

[241] Fix | Delete

return False

[242] Fix | Delete

if self.is_simple and other.is_simple:

[243] Fix | Delete

return self.nt <= other.nt

[244] Fix | Delete

n = max(self.longest_run_of_spaces(),

[245] Fix | Delete

other.longest_run_of_spaces()) + 1

[246] Fix | Delete

# the self.n >= other.n test already did it for ts=1

[247] Fix | Delete

for ts in range(2, n+1):

[248] Fix | Delete

if self.indent_level(ts) >= other.indent_level(ts):

[249] Fix | Delete

return False

[250] Fix | Delete

return True

[251] Fix | Delete

[252] Fix | Delete

# return a list of tuples (ts, i1, i2) such that

[253] Fix | Delete

# i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.

[254] Fix | Delete

# Intended to be used after not self.less(other) is known, in which

[255] Fix | Delete

# case it will return at least one witnessing tab size.

[256] Fix | Delete

def not_less_witness(self, other):

[257] Fix | Delete

n = max(self.longest_run_of_spaces(),

[258] Fix | Delete

other.longest_run_of_spaces()) + 1

[259] Fix | Delete

a = []

[260] Fix | Delete

for ts in range(1, n+1):

[261] Fix | Delete

if self.indent_level(ts) >= other.indent_level(ts):

[262] Fix | Delete

a.append( (ts,

[263] Fix | Delete

self.indent_level(ts),

[264] Fix | Delete

other.indent_level(ts)) )

[265] Fix | Delete

return a

[266] Fix | Delete

[267] Fix | Delete

def format_witnesses(w):

[268] Fix | Delete

firsts = (str(tup[0]) for tup in w)

[269] Fix | Delete

prefix = "at tab size"

[270] Fix | Delete

if len(w) > 1:

[271] Fix | Delete

prefix = prefix + "s"

[272] Fix | Delete

return prefix + " " + ', '.join(firsts)

[273] Fix | Delete

[274] Fix | Delete

def process_tokens(tokens):

[275] Fix | Delete

INDENT = tokenize.INDENT

[276] Fix | Delete

DEDENT = tokenize.DEDENT

[277] Fix | Delete

NEWLINE = tokenize.NEWLINE

[278] Fix | Delete

JUNK = tokenize.COMMENT, tokenize.NL

[279] Fix | Delete

indents = [Whitespace("")]

[280] Fix | Delete

check_equal = 0

[281] Fix | Delete

[282] Fix | Delete

for (type, token, start, end, line) in tokens:

[283] Fix | Delete

if type == NEWLINE:

[284] Fix | Delete

# a program statement, or ENDMARKER, will eventually follow,

[285] Fix | Delete

# after some (possibly empty) run of tokens of the form

[286] Fix | Delete

# (NL | COMMENT)* (INDENT | DEDENT+)?

[287] Fix | Delete

# If an INDENT appears, setting check_equal is wrong, and will

[288] Fix | Delete

# be undone when we see the INDENT.

[289] Fix | Delete

check_equal = 1

[290] Fix | Delete

[291] Fix | Delete

elif type == INDENT:

[292] Fix | Delete

check_equal = 0

[293] Fix | Delete

thisguy = Whitespace(token)

[294] Fix | Delete

if not indents[-1].less(thisguy):

[295] Fix | Delete

witness = indents[-1].not_less_witness(thisguy)

[296] Fix | Delete

msg = "indent not greater e.g. " + format_witnesses(witness)

[297] Fix | Delete

raise NannyNag(start[0], msg, line)

[298] Fix | Delete

indents.append(thisguy)

[299] Fix | Delete

[300] Fix | Delete

elif type == DEDENT:

[301] Fix | Delete

# there's nothing we need to check here! what's important is

[302] Fix | Delete

# that when the run of DEDENTs ends, the indentation of the

[303] Fix | Delete

# program statement (or ENDMARKER) that triggered the run is

[304] Fix | Delete

# equal to what's left at the top of the indents stack

[305] Fix | Delete

[306] Fix | Delete

# Ouch! This assert triggers if the last line of the source

[307] Fix | Delete

# is indented *and* lacks a newline -- then DEDENTs pop out

[308] Fix | Delete

# of thin air.

[309] Fix | Delete

# assert check_equal # else no earlier NEWLINE, or an earlier INDENT

[310] Fix | Delete

check_equal = 1

[311] Fix | Delete

[312] Fix | Delete

del indents[-1]

[313] Fix | Delete

[314] Fix | Delete

elif check_equal and type not in JUNK:

[315] Fix | Delete

# this is the first "real token" following a NEWLINE, so it

[316] Fix | Delete

# must be the first token of the next program statement, or an

[317] Fix | Delete

# ENDMARKER; the "line" argument exposes the leading whitespace

[318] Fix | Delete

# for this statement; in the case of ENDMARKER, line is an empty

[319] Fix | Delete

# string, so will properly match the empty string with which the

[320] Fix | Delete

# "indents" stack was seeded

[321] Fix | Delete

check_equal = 0

[322] Fix | Delete

thisguy = Whitespace(line)

[323] Fix | Delete

if not indents[-1].equal(thisguy):

[324] Fix | Delete

witness = indents[-1].not_equal_witness(thisguy)

[325] Fix | Delete

msg = "indent not equal e.g. " + format_witnesses(witness)

[326] Fix | Delete

raise NannyNag(start[0], msg, line)

[327] Fix | Delete

[328] Fix | Delete

[329] Fix | Delete

if __name__ == '__main__':

[330] Fix | Delete

main()

[331] Fix | Delete

[332] Fix | Delete