Edit File by line

"""Provide advanced parsing abilities for ParenMatch and other extensions.

[0] Fix | Delete

[1] Fix | Delete

HyperParser uses PyParser. PyParser mostly gives information on the

[2] Fix | Delete

proper indentation of code. HyperParser gives additional information on

[3] Fix | Delete

the structure of code.

[4] Fix | Delete

"""

[5] Fix | Delete

[6] Fix | Delete

import string

[7] Fix | Delete

import keyword

[8] Fix | Delete

from idlelib import PyParse

[9] Fix | Delete

[10] Fix | Delete

class HyperParser:

[11] Fix | Delete

[12] Fix | Delete

def __init__(self, editwin, index):

[13] Fix | Delete

"To initialize, analyze the surroundings of the given index."

[14] Fix | Delete

[15] Fix | Delete

self.editwin = editwin

[16] Fix | Delete

self.text = text = editwin.text

[17] Fix | Delete

[18] Fix | Delete

parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)

[19] Fix | Delete

[20] Fix | Delete

def index2line(index):

[21] Fix | Delete

return int(float(index))

[22] Fix | Delete

lno = index2line(text.index(index))

[23] Fix | Delete

[24] Fix | Delete

if not editwin.context_use_ps1:

[25] Fix | Delete

for context in editwin.num_context_lines:

[26] Fix | Delete

startat = max(lno - context, 1)

[27] Fix | Delete

startatindex = repr(startat) + ".0"

[28] Fix | Delete

stopatindex = "%d.end" % lno

[29] Fix | Delete

# We add the newline because PyParse requires a newline

[30] Fix | Delete

# at end. We add a space so that index won't be at end

[31] Fix | Delete

# of line, so that its status will be the same as the

[32] Fix | Delete

# char before it, if should.

[33] Fix | Delete

parser.set_str(text.get(startatindex, stopatindex)+' \n')

[34] Fix | Delete

bod = parser.find_good_parse_start(

[35] Fix | Delete

editwin._build_char_in_string_func(startatindex))

[36] Fix | Delete

if bod is not None or startat == 1:

[37] Fix | Delete

break

[38] Fix | Delete

parser.set_lo(bod or 0)

[39] Fix | Delete

else:

[40] Fix | Delete

r = text.tag_prevrange("console", index)

[41] Fix | Delete

if r:

[42] Fix | Delete

startatindex = r[1]

[43] Fix | Delete

else:

[44] Fix | Delete

startatindex = "1.0"

[45] Fix | Delete

stopatindex = "%d.end" % lno

[46] Fix | Delete

# We add the newline because PyParse requires it. We add a

[47] Fix | Delete

# space so that index won't be at end of line, so that its

[48] Fix | Delete

# status will be the same as the char before it, if should.

[49] Fix | Delete

parser.set_str(text.get(startatindex, stopatindex)+' \n')

[50] Fix | Delete

parser.set_lo(0)

[51] Fix | Delete

[52] Fix | Delete

# We want what the parser has, minus the last newline and space.

[53] Fix | Delete

self.rawtext = parser.str[:-2]

[54] Fix | Delete

# Parser.str apparently preserves the statement we are in, so

[55] Fix | Delete

# that stopatindex can be used to synchronize the string with

[56] Fix | Delete

# the text box indices.

[57] Fix | Delete

self.stopatindex = stopatindex

[58] Fix | Delete

self.bracketing = parser.get_last_stmt_bracketing()

[59] Fix | Delete

# find which pairs of bracketing are openers. These always

[60] Fix | Delete

# correspond to a character of rawtext.

[61] Fix | Delete

self.isopener = [i>0 and self.bracketing[i][1] >

[62] Fix | Delete

self.bracketing[i-1][1]

[63] Fix | Delete

for i in range(len(self.bracketing))]

[64] Fix | Delete

[65] Fix | Delete

self.set_index(index)

[66] Fix | Delete

[67] Fix | Delete

def set_index(self, index):

[68] Fix | Delete

"""Set the index to which the functions relate.

[69] Fix | Delete

[70] Fix | Delete

The index must be in the same statement.

[71] Fix | Delete

"""

[72] Fix | Delete

indexinrawtext = (len(self.rawtext) -

[73] Fix | Delete

len(self.text.get(index, self.stopatindex)))

[74] Fix | Delete

if indexinrawtext < 0:

[75] Fix | Delete

raise ValueError("Index %s precedes the analyzed statement"

[76] Fix | Delete

% index)

[77] Fix | Delete

self.indexinrawtext = indexinrawtext

[78] Fix | Delete

# find the rightmost bracket to which index belongs

[79] Fix | Delete

self.indexbracket = 0

[80] Fix | Delete

while (self.indexbracket < len(self.bracketing)-1 and

[81] Fix | Delete

self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):

[82] Fix | Delete

self.indexbracket += 1

[83] Fix | Delete

if (self.indexbracket < len(self.bracketing)-1 and

[84] Fix | Delete

self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and

[85] Fix | Delete

not self.isopener[self.indexbracket+1]):

[86] Fix | Delete

self.indexbracket += 1

[87] Fix | Delete

[88] Fix | Delete

def is_in_string(self):

[89] Fix | Delete

"""Is the index given to the HyperParser in a string?"""

[90] Fix | Delete

# The bracket to which we belong should be an opener.

[91] Fix | Delete

# If it's an opener, it has to have a character.

[92] Fix | Delete

return (self.isopener[self.indexbracket] and

[93] Fix | Delete

self.rawtext[self.bracketing[self.indexbracket][0]]

[94] Fix | Delete

in ('"', "'"))

[95] Fix | Delete

[96] Fix | Delete

def is_in_code(self):

[97] Fix | Delete

"""Is the index given to the HyperParser in normal code?"""

[98] Fix | Delete

return (not self.isopener[self.indexbracket] or

[99] Fix | Delete

self.rawtext[self.bracketing[self.indexbracket][0]]

[100] Fix | Delete

not in ('#', '"', "'"))

[101] Fix | Delete

[102] Fix | Delete

def get_surrounding_brackets(self, openers='([{', mustclose=False):

[103] Fix | Delete

"""Return bracket indexes or None.

[104] Fix | Delete

[105] Fix | Delete

If the index given to the HyperParser is surrounded by a

[106] Fix | Delete

bracket defined in openers (or at least has one before it),

[107] Fix | Delete

return the indices of the opening bracket and the closing

[108] Fix | Delete

bracket (or the end of line, whichever comes first).

[109] Fix | Delete

[110] Fix | Delete

If it is not surrounded by brackets, or the end of line comes

[111] Fix | Delete

before the closing bracket and mustclose is True, returns None.

[112] Fix | Delete

"""

[113] Fix | Delete

[114] Fix | Delete

bracketinglevel = self.bracketing[self.indexbracket][1]

[115] Fix | Delete

before = self.indexbracket

[116] Fix | Delete

while (not self.isopener[before] or

[117] Fix | Delete

self.rawtext[self.bracketing[before][0]] not in openers or

[118] Fix | Delete

self.bracketing[before][1] > bracketinglevel):

[119] Fix | Delete

before -= 1

[120] Fix | Delete

if before < 0:

[121] Fix | Delete

return None

[122] Fix | Delete

bracketinglevel = min(bracketinglevel, self.bracketing[before][1])

[123] Fix | Delete

after = self.indexbracket + 1

[124] Fix | Delete

while (after < len(self.bracketing) and

[125] Fix | Delete

self.bracketing[after][1] >= bracketinglevel):

[126] Fix | Delete

after += 1

[127] Fix | Delete

[128] Fix | Delete

beforeindex = self.text.index("%s-%dc" %

[129] Fix | Delete

(self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))

[130] Fix | Delete

if (after >= len(self.bracketing) or

[131] Fix | Delete

self.bracketing[after][0] > len(self.rawtext)):

[132] Fix | Delete

if mustclose:

[133] Fix | Delete

return None

[134] Fix | Delete

afterindex = self.stopatindex

[135] Fix | Delete

else:

[136] Fix | Delete

# We are after a real char, so it is a ')' and we give the

[137] Fix | Delete

# index before it.

[138] Fix | Delete

afterindex = self.text.index(

[139] Fix | Delete

"%s-%dc" % (self.stopatindex,

[140] Fix | Delete

len(self.rawtext)-(self.bracketing[after][0]-1)))

[141] Fix | Delete

[142] Fix | Delete

return beforeindex, afterindex

[143] Fix | Delete

[144] Fix | Delete

# Ascii chars that may be in a white space

[145] Fix | Delete

_whitespace_chars = " \t\n\\"

[146] Fix | Delete

# Ascii chars that may be in an identifier

[147] Fix | Delete

_id_chars = string.ascii_letters + string.digits + "_"

[148] Fix | Delete

# Ascii chars that may be the first char of an identifier

[149] Fix | Delete

_id_first_chars = string.ascii_letters + "_"

[150] Fix | Delete

[151] Fix | Delete

# Given a string and pos, return the number of chars in the

[152] Fix | Delete

# identifier which ends at pos, or 0 if there is no such one. Saved

[153] Fix | Delete

# words are not identifiers.

[154] Fix | Delete

def _eat_identifier(self, str, limit, pos):

[155] Fix | Delete

i = pos

[156] Fix | Delete

while i > limit and str[i-1] in self._id_chars:

[157] Fix | Delete

i -= 1

[158] Fix | Delete

if (i < pos and (str[i] not in self._id_first_chars or

[159] Fix | Delete

keyword.iskeyword(str[i:pos]))):

[160] Fix | Delete

i = pos

[161] Fix | Delete

return pos - i

[162] Fix | Delete

[163] Fix | Delete

def get_expression(self):

[164] Fix | Delete

"""Return a string with the Python expression which ends at the

[165] Fix | Delete

given index, which is empty if there is no real one.

[166] Fix | Delete

"""

[167] Fix | Delete

if not self.is_in_code():

[168] Fix | Delete

raise ValueError("get_expression should only be called "

[169] Fix | Delete

"if index is inside a code.")

[170] Fix | Delete

[171] Fix | Delete

rawtext = self.rawtext

[172] Fix | Delete

bracketing = self.bracketing

[173] Fix | Delete

[174] Fix | Delete

brck_index = self.indexbracket

[175] Fix | Delete

brck_limit = bracketing[brck_index][0]

[176] Fix | Delete

pos = self.indexinrawtext

[177] Fix | Delete

[178] Fix | Delete

last_identifier_pos = pos

[179] Fix | Delete

postdot_phase = True

[180] Fix | Delete

[181] Fix | Delete

while 1:

[182] Fix | Delete

# Eat whitespaces, comments, and if postdot_phase is False - a dot

[183] Fix | Delete

while 1:

[184] Fix | Delete

if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:

[185] Fix | Delete

# Eat a whitespace

[186] Fix | Delete

pos -= 1

[187] Fix | Delete

elif (not postdot_phase and

[188] Fix | Delete

pos > brck_limit and rawtext[pos-1] == '.'):

[189] Fix | Delete

# Eat a dot

[190] Fix | Delete

pos -= 1

[191] Fix | Delete

postdot_phase = True

[192] Fix | Delete

# The next line will fail if we are *inside* a comment,

[193] Fix | Delete

# but we shouldn't be.

[194] Fix | Delete

elif (pos == brck_limit and brck_index > 0 and

[195] Fix | Delete

rawtext[bracketing[brck_index-1][0]] == '#'):

[196] Fix | Delete

# Eat a comment

[197] Fix | Delete

brck_index -= 2

[198] Fix | Delete

brck_limit = bracketing[brck_index][0]

[199] Fix | Delete

pos = bracketing[brck_index+1][0]

[200] Fix | Delete

else:

[201] Fix | Delete

# If we didn't eat anything, quit.

[202] Fix | Delete

break

[203] Fix | Delete

[204] Fix | Delete

if not postdot_phase:

[205] Fix | Delete

# We didn't find a dot, so the expression end at the

[206] Fix | Delete

# last identifier pos.

[207] Fix | Delete

break

[208] Fix | Delete

[209] Fix | Delete

ret = self._eat_identifier(rawtext, brck_limit, pos)

[210] Fix | Delete

if ret:

[211] Fix | Delete

# There is an identifier to eat

[212] Fix | Delete

pos = pos - ret

[213] Fix | Delete

last_identifier_pos = pos

[214] Fix | Delete

# Now, to continue the search, we must find a dot.

[215] Fix | Delete

postdot_phase = False

[216] Fix | Delete

# (the loop continues now)

[217] Fix | Delete

[218] Fix | Delete

elif pos == brck_limit:

[219] Fix | Delete

# We are at a bracketing limit. If it is a closing

[220] Fix | Delete

# bracket, eat the bracket, otherwise, stop the search.

[221] Fix | Delete

level = bracketing[brck_index][1]

[222] Fix | Delete

while brck_index > 0 and bracketing[brck_index-1][1] > level:

[223] Fix | Delete

brck_index -= 1

[224] Fix | Delete

if bracketing[brck_index][0] == brck_limit:

[225] Fix | Delete

# We were not at the end of a closing bracket

[226] Fix | Delete

break

[227] Fix | Delete

pos = bracketing[brck_index][0]

[228] Fix | Delete

brck_index -= 1

[229] Fix | Delete

brck_limit = bracketing[brck_index][0]

[230] Fix | Delete

last_identifier_pos = pos

[231] Fix | Delete

if rawtext[pos] in "([":

[232] Fix | Delete

# [] and () may be used after an identifier, so we

[233] Fix | Delete

# continue. postdot_phase is True, so we don't allow a dot.

[234] Fix | Delete

pass

[235] Fix | Delete

else:

[236] Fix | Delete

# We can't continue after other types of brackets

[237] Fix | Delete

if rawtext[pos] in "'\"":

[238] Fix | Delete

# Scan a string prefix

[239] Fix | Delete

while pos > 0 and rawtext[pos - 1] in "rRbBuU":

[240] Fix | Delete

pos -= 1

[241] Fix | Delete

last_identifier_pos = pos

[242] Fix | Delete

break

[243] Fix | Delete

[244] Fix | Delete

else:

[245] Fix | Delete

# We've found an operator or something.

[246] Fix | Delete

break

[247] Fix | Delete

[248] Fix | Delete

return rawtext[last_identifier_pos:self.indexinrawtext]

[249] Fix | Delete

[250] Fix | Delete

[251] Fix | Delete

if __name__ == '__main__':

[252] Fix | Delete

import unittest

[253] Fix | Delete

unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)

[254] Fix | Delete

[255] Fix | Delete