Edit File by line

[0] Fix | Delete

# Licensed to PSF under a Contributor Agreement.

[1] Fix | Delete

[2] Fix | Delete

"""Parser engine for the grammar tables generated by pgen.

[3] Fix | Delete

[4] Fix | Delete

The grammar table must be loaded first.

[5] Fix | Delete

[6] Fix | Delete

See Parser/parser.c in the Python distribution for additional info on

[7] Fix | Delete

how this parsing engine works.

[8] Fix | Delete

[9] Fix | Delete

"""

[10] Fix | Delete

[11] Fix | Delete

# Local imports

[12] Fix | Delete

from . import token

[13] Fix | Delete

[14] Fix | Delete

class ParseError(Exception):

[15] Fix | Delete

"""Exception to signal the parser is stuck."""

[16] Fix | Delete

[17] Fix | Delete

def __init__(self, msg, type, value, context):

[18] Fix | Delete

Exception.__init__(self, "%s: type=%r, value=%r, context=%r" %

[19] Fix | Delete

(msg, type, value, context))

[20] Fix | Delete

self.msg = msg

[21] Fix | Delete

self.type = type

[22] Fix | Delete

self.value = value

[23] Fix | Delete

self.context = context

[24] Fix | Delete

[25] Fix | Delete

class Parser(object):

[26] Fix | Delete

"""Parser engine.

[27] Fix | Delete

[28] Fix | Delete

The proper usage sequence is:

[29] Fix | Delete

[30] Fix | Delete

p = Parser(grammar, [converter]) # create instance

[31] Fix | Delete

p.setup([start]) # prepare for parsing

[32] Fix | Delete

<for each input token>:

[33] Fix | Delete

if p.addtoken(...): # parse a token; may raise ParseError

[34] Fix | Delete

break

[35] Fix | Delete

root = p.rootnode # root of abstract syntax tree

[36] Fix | Delete

[37] Fix | Delete

A Parser instance may be reused by calling setup() repeatedly.

[38] Fix | Delete

[39] Fix | Delete

A Parser instance contains state pertaining to the current token

[40] Fix | Delete

sequence, and should not be used concurrently by different threads

[41] Fix | Delete

to parse separate token sequences.

[42] Fix | Delete

[43] Fix | Delete

See driver.py for how to get input tokens by tokenizing a file or

[44] Fix | Delete

string.

[45] Fix | Delete

[46] Fix | Delete

Parsing is complete when addtoken() returns True; the root of the

[47] Fix | Delete

abstract syntax tree can then be retrieved from the rootnode

[48] Fix | Delete

instance variable. When a syntax error occurs, addtoken() raises

[49] Fix | Delete

the ParseError exception. There is no error recovery; the parser

[50] Fix | Delete

cannot be used after a syntax error was reported (but it can be

[51] Fix | Delete

reinitialized by calling setup()).

[52] Fix | Delete

[53] Fix | Delete

"""

[54] Fix | Delete

[55] Fix | Delete

def __init__(self, grammar, convert=None):

[56] Fix | Delete

"""Constructor.

[57] Fix | Delete

[58] Fix | Delete

The grammar argument is a grammar.Grammar instance; see the

[59] Fix | Delete

grammar module for more information.

[60] Fix | Delete

[61] Fix | Delete

The parser is not ready yet for parsing; you must call the

[62] Fix | Delete

setup() method to get it started.

[63] Fix | Delete

[64] Fix | Delete

The optional convert argument is a function mapping concrete

[65] Fix | Delete

syntax tree nodes to abstract syntax tree nodes. If not

[66] Fix | Delete

given, no conversion is done and the syntax tree produced is

[67] Fix | Delete

the concrete syntax tree. If given, it must be a function of

[68] Fix | Delete

two arguments, the first being the grammar (a grammar.Grammar

[69] Fix | Delete

instance), and the second being the concrete syntax tree node

[70] Fix | Delete

to be converted. The syntax tree is converted from the bottom

[71] Fix | Delete

up.

[72] Fix | Delete

[73] Fix | Delete

A concrete syntax tree node is a (type, value, context, nodes)

[74] Fix | Delete

tuple, where type is the node type (a token or symbol number),

[75] Fix | Delete

value is None for symbols and a string for tokens, context is

[76] Fix | Delete

None or an opaque value used for error reporting (typically a

[77] Fix | Delete

(lineno, offset) pair), and nodes is a list of children for

[78] Fix | Delete

symbols, and None for tokens.

[79] Fix | Delete

[80] Fix | Delete

An abstract syntax tree node may be anything; this is entirely

[81] Fix | Delete

up to the converter function.

[82] Fix | Delete

[83] Fix | Delete

"""

[84] Fix | Delete

self.grammar = grammar

[85] Fix | Delete

self.convert = convert or (lambda grammar, node: node)

[86] Fix | Delete

[87] Fix | Delete

def setup(self, start=None):

[88] Fix | Delete

"""Prepare for parsing.

[89] Fix | Delete

[90] Fix | Delete

This *must* be called before starting to parse.

[91] Fix | Delete

[92] Fix | Delete

The optional argument is an alternative start symbol; it

[93] Fix | Delete

defaults to the grammar's start symbol.

[94] Fix | Delete

[95] Fix | Delete

You can use a Parser instance to parse any number of programs;

[96] Fix | Delete

each time you call setup() the parser is reset to an initial

[97] Fix | Delete

state determined by the (implicit or explicit) start symbol.

[98] Fix | Delete

[99] Fix | Delete

"""

[100] Fix | Delete

if start is None:

[101] Fix | Delete

start = self.grammar.start

[102] Fix | Delete

# Each stack entry is a tuple: (dfa, state, node).

[103] Fix | Delete

# A node is a tuple: (type, value, context, children),

[104] Fix | Delete

# where children is a list of nodes or None, and context may be None.

[105] Fix | Delete

newnode = (start, None, None, [])

[106] Fix | Delete

stackentry = (self.grammar.dfas[start], 0, newnode)

[107] Fix | Delete

self.stack = [stackentry]

[108] Fix | Delete

self.rootnode = None

[109] Fix | Delete

self.used_names = set() # Aliased to self.rootnode.used_names in pop()

[110] Fix | Delete

[111] Fix | Delete

def addtoken(self, type, value, context):

[112] Fix | Delete

"""Add a token; return True iff this is the end of the program."""

[113] Fix | Delete

# Map from token to label

[114] Fix | Delete

ilabel = self.classify(type, value, context)

[115] Fix | Delete

# Loop until the token is shifted; may raise exceptions

[116] Fix | Delete

while True:

[117] Fix | Delete

dfa, state, node = self.stack[-1]

[118] Fix | Delete

states, first = dfa

[119] Fix | Delete

arcs = states[state]

[120] Fix | Delete

# Look for a state with this label

[121] Fix | Delete

for i, newstate in arcs:

[122] Fix | Delete

t, v = self.grammar.labels[i]

[123] Fix | Delete

if ilabel == i:

[124] Fix | Delete

# Look it up in the list of labels

[125] Fix | Delete

assert t < 256

[126] Fix | Delete

# Shift a token; we're done with it

[127] Fix | Delete

self.shift(type, value, newstate, context)

[128] Fix | Delete

# Pop while we are in an accept-only state

[129] Fix | Delete

state = newstate

[130] Fix | Delete

while states[state] == [(0, state)]:

[131] Fix | Delete

self.pop()

[132] Fix | Delete

if not self.stack:

[133] Fix | Delete

# Done parsing!

[134] Fix | Delete

return True

[135] Fix | Delete

dfa, state, node = self.stack[-1]

[136] Fix | Delete

states, first = dfa

[137] Fix | Delete

# Done with this token

[138] Fix | Delete

return False

[139] Fix | Delete

elif t >= 256:

[140] Fix | Delete

# See if it's a symbol and if we're in its first set

[141] Fix | Delete

itsdfa = self.grammar.dfas[t]

[142] Fix | Delete

itsstates, itsfirst = itsdfa

[143] Fix | Delete

if ilabel in itsfirst:

[144] Fix | Delete

# Push a symbol

[145] Fix | Delete

self.push(t, self.grammar.dfas[t], newstate, context)

[146] Fix | Delete

break # To continue the outer while loop

[147] Fix | Delete

else:

[148] Fix | Delete

if (0, state) in arcs:

[149] Fix | Delete

# An accepting state, pop it and try something else

[150] Fix | Delete

self.pop()

[151] Fix | Delete

if not self.stack:

[152] Fix | Delete

# Done parsing, but another token is input

[153] Fix | Delete

raise ParseError("too much input",

[154] Fix | Delete

type, value, context)

[155] Fix | Delete

else:

[156] Fix | Delete

# No success finding a transition

[157] Fix | Delete

raise ParseError("bad input", type, value, context)

[158] Fix | Delete

[159] Fix | Delete

def classify(self, type, value, context):

[160] Fix | Delete

"""Turn a token into a label. (Internal)"""

[161] Fix | Delete

if type == token.NAME:

[162] Fix | Delete

# Keep a listing of all used names

[163] Fix | Delete

self.used_names.add(value)

[164] Fix | Delete

# Check for reserved words

[165] Fix | Delete

ilabel = self.grammar.keywords.get(value)

[166] Fix | Delete

if ilabel is not None:

[167] Fix | Delete

return ilabel

[168] Fix | Delete

ilabel = self.grammar.tokens.get(type)

[169] Fix | Delete

if ilabel is None:

[170] Fix | Delete

raise ParseError("bad token", type, value, context)

[171] Fix | Delete

return ilabel

[172] Fix | Delete

[173] Fix | Delete

def shift(self, type, value, newstate, context):

[174] Fix | Delete

"""Shift a token. (Internal)"""

[175] Fix | Delete

dfa, state, node = self.stack[-1]

[176] Fix | Delete

newnode = (type, value, context, None)

[177] Fix | Delete

newnode = self.convert(self.grammar, newnode)

[178] Fix | Delete

if newnode is not None:

[179] Fix | Delete

node[-1].append(newnode)

[180] Fix | Delete

self.stack[-1] = (dfa, newstate, node)

[181] Fix | Delete

[182] Fix | Delete

def push(self, type, newdfa, newstate, context):

[183] Fix | Delete

"""Push a nonterminal. (Internal)"""

[184] Fix | Delete

dfa, state, node = self.stack[-1]

[185] Fix | Delete

newnode = (type, None, context, [])

[186] Fix | Delete

self.stack[-1] = (dfa, newstate, node)

[187] Fix | Delete

self.stack.append((newdfa, 0, newnode))

[188] Fix | Delete

[189] Fix | Delete

def pop(self):

[190] Fix | Delete

"""Pop a nonterminal. (Internal)"""

[191] Fix | Delete

popdfa, popstate, popnode = self.stack.pop()

[192] Fix | Delete

newnode = self.convert(self.grammar, popnode)

[193] Fix | Delete

if newnode is not None:

[194] Fix | Delete

if self.stack:

[195] Fix | Delete

dfa, state, node = self.stack[-1]

[196] Fix | Delete

node[-1].append(newnode)

[197] Fix | Delete

else:

[198] Fix | Delete

self.rootnode = newnode

[199] Fix | Delete

self.rootnode.used_names = self.used_names

[200] Fix | Delete

[201] Fix | Delete