Edit File by line

[0] Fix | Delete

# Licensed to PSF under a Contributor Agreement.

[1] Fix | Delete

[2] Fix | Delete

"""This module defines the data structures used to represent a grammar.

[3] Fix | Delete

[4] Fix | Delete

These are a bit arcane because they are derived from the data

[5] Fix | Delete

structures used by Python's 'pgen' parser generator.

[6] Fix | Delete

[7] Fix | Delete

There's also a table here mapping operators to their names in the

[8] Fix | Delete

token module; the Python tokenize module reports all operators as the

[9] Fix | Delete

fallback token code OP, but the parser needs the actual token code.

[10] Fix | Delete

[11] Fix | Delete

"""

[12] Fix | Delete

[13] Fix | Delete

# Python imports

[14] Fix | Delete

import collections

[15] Fix | Delete

import pickle

[16] Fix | Delete

[17] Fix | Delete

# Local imports

[18] Fix | Delete

from . import token, tokenize

[19] Fix | Delete

[20] Fix | Delete

[21] Fix | Delete

class Grammar(object):

[22] Fix | Delete

"""Pgen parsing tables conversion class.

[23] Fix | Delete

[24] Fix | Delete

Once initialized, this class supplies the grammar tables for the

[25] Fix | Delete

parsing engine implemented by parse.py. The parsing engine

[26] Fix | Delete

accesses the instance variables directly. The class here does not

[27] Fix | Delete

provide initialization of the tables; several subclasses exist to

[28] Fix | Delete

do this (see the conv and pgen modules).

[29] Fix | Delete

[30] Fix | Delete

The load() method reads the tables from a pickle file, which is

[31] Fix | Delete

much faster than the other ways offered by subclasses. The pickle

[32] Fix | Delete

file is written by calling dump() (after loading the grammar

[33] Fix | Delete

tables using a subclass). The report() method prints a readable

[34] Fix | Delete

representation of the tables to stdout, for debugging.

[35] Fix | Delete

[36] Fix | Delete

The instance variables are as follows:

[37] Fix | Delete

[38] Fix | Delete

symbol2number -- a dict mapping symbol names to numbers. Symbol

[39] Fix | Delete

numbers are always 256 or higher, to distinguish

[40] Fix | Delete

them from token numbers, which are between 0 and

[41] Fix | Delete

255 (inclusive).

[42] Fix | Delete

[43] Fix | Delete

number2symbol -- a dict mapping numbers to symbol names;

[44] Fix | Delete

these two are each other's inverse.

[45] Fix | Delete

[46] Fix | Delete

states -- a list of DFAs, where each DFA is a list of

[47] Fix | Delete

states, each state is a list of arcs, and each

[48] Fix | Delete

arc is a (i, j) pair where i is a label and j is

[49] Fix | Delete

a state number. The DFA number is the index into

[50] Fix | Delete

this list. (This name is slightly confusing.)

[51] Fix | Delete

Final states are represented by a special arc of

[52] Fix | Delete

the form (0, j) where j is its own state number.

[53] Fix | Delete

[54] Fix | Delete

dfas -- a dict mapping symbol numbers to (DFA, first)

[55] Fix | Delete

pairs, where DFA is an item from the states list

[56] Fix | Delete

above, and first is a set of tokens that can

[57] Fix | Delete

begin this grammar rule (represented by a dict

[58] Fix | Delete

whose values are always 1).

[59] Fix | Delete

[60] Fix | Delete

labels -- a list of (x, y) pairs where x is either a token

[61] Fix | Delete

number or a symbol number, and y is either None

[62] Fix | Delete

or a string; the strings are keywords. The label

[63] Fix | Delete

number is the index in this list; label numbers

[64] Fix | Delete

are used to mark state transitions (arcs) in the

[65] Fix | Delete

DFAs.

[66] Fix | Delete

[67] Fix | Delete

start -- the number of the grammar's start symbol.

[68] Fix | Delete

[69] Fix | Delete

keywords -- a dict mapping keyword strings to arc labels.

[70] Fix | Delete

[71] Fix | Delete

tokens -- a dict mapping token numbers to arc labels.

[72] Fix | Delete

[73] Fix | Delete

"""

[74] Fix | Delete

[75] Fix | Delete

def __init__(self):

[76] Fix | Delete

self.symbol2number = {}

[77] Fix | Delete

self.number2symbol = {}

[78] Fix | Delete

self.states = []

[79] Fix | Delete

self.dfas = {}

[80] Fix | Delete

self.labels = [(0, "EMPTY")]

[81] Fix | Delete

self.keywords = {}

[82] Fix | Delete

self.tokens = {}

[83] Fix | Delete

self.symbol2label = {}

[84] Fix | Delete

self.start = 256

[85] Fix | Delete

[86] Fix | Delete

def dump(self, filename):

[87] Fix | Delete

"""Dump the grammar tables to a pickle file.

[88] Fix | Delete

[89] Fix | Delete

dump() recursively changes all dict to OrderedDict, so the pickled file

[90] Fix | Delete

is not exactly the same as what was passed in to dump(). load() uses the

[91] Fix | Delete

pickled file to create the tables, but only changes OrderedDict to dict

[92] Fix | Delete

at the top level; it does not recursively change OrderedDict to dict.

[93] Fix | Delete

So, the loaded tables are different from the original tables that were

[94] Fix | Delete

passed to load() in that some of the OrderedDict (from the pickled file)

[95] Fix | Delete

are not changed back to dict. For parsing, this has no effect on

[96] Fix | Delete

performance because OrderedDict uses dict's __getitem__ with nothing in

[97] Fix | Delete

between.

[98] Fix | Delete

"""

[99] Fix | Delete

with open(filename, "wb") as f:

[100] Fix | Delete

d = _make_deterministic(self.__dict__)

[101] Fix | Delete

pickle.dump(d, f, 2)

[102] Fix | Delete

[103] Fix | Delete

def load(self, filename):

[104] Fix | Delete

"""Load the grammar tables from a pickle file."""

[105] Fix | Delete

f = open(filename, "rb")

[106] Fix | Delete

d = pickle.load(f)

[107] Fix | Delete

f.close()

[108] Fix | Delete

self.__dict__.update(d)

[109] Fix | Delete

[110] Fix | Delete

def loads(self, pkl):

[111] Fix | Delete

"""Load the grammar tables from a pickle bytes object."""

[112] Fix | Delete

self.__dict__.update(pickle.loads(pkl))

[113] Fix | Delete

[114] Fix | Delete

def copy(self):

[115] Fix | Delete

"""

[116] Fix | Delete

Copy the grammar.

[117] Fix | Delete

"""

[118] Fix | Delete

new = self.__class__()

[119] Fix | Delete

for dict_attr in ("symbol2number", "number2symbol", "dfas", "keywords",

[120] Fix | Delete

"tokens", "symbol2label"):

[121] Fix | Delete

setattr(new, dict_attr, getattr(self, dict_attr).copy())

[122] Fix | Delete

new.labels = self.labels[:]

[123] Fix | Delete

new.states = self.states[:]

[124] Fix | Delete

new.start = self.start

[125] Fix | Delete

return new

[126] Fix | Delete

[127] Fix | Delete

def report(self):

[128] Fix | Delete

"""Dump the grammar tables to standard output, for debugging."""

[129] Fix | Delete

from pprint import pprint

[130] Fix | Delete

print "s2n"

[131] Fix | Delete

pprint(self.symbol2number)

[132] Fix | Delete

print "n2s"

[133] Fix | Delete

pprint(self.number2symbol)

[134] Fix | Delete

print "states"

[135] Fix | Delete

pprint(self.states)

[136] Fix | Delete

print "dfas"

[137] Fix | Delete

pprint(self.dfas)

[138] Fix | Delete

print "labels"

[139] Fix | Delete

pprint(self.labels)

[140] Fix | Delete

print "start", self.start

[141] Fix | Delete

[142] Fix | Delete

[143] Fix | Delete

def _make_deterministic(top):

[144] Fix | Delete

if isinstance(top, dict):

[145] Fix | Delete

return collections.OrderedDict(

[146] Fix | Delete

sorted(((k, _make_deterministic(v)) for k, v in top.iteritems())))

[147] Fix | Delete

if isinstance(top, list):

[148] Fix | Delete

return [_make_deterministic(e) for e in top]

[149] Fix | Delete

if isinstance(top, tuple):

[150] Fix | Delete

return tuple(_make_deterministic(e) for e in top)

[151] Fix | Delete

return top

[152] Fix | Delete

[153] Fix | Delete

[154] Fix | Delete

# Map from operator to number (since tokenize doesn't do this)

[155] Fix | Delete

[156] Fix | Delete

opmap_raw = """

[157] Fix | Delete

( LPAR

[158] Fix | Delete

) RPAR

[159] Fix | Delete

[ LSQB

[160] Fix | Delete

] RSQB

[161] Fix | Delete

: COLON

[162] Fix | Delete

, COMMA

[163] Fix | Delete

; SEMI

[164] Fix | Delete

+ PLUS

[165] Fix | Delete

- MINUS

[166] Fix | Delete

* STAR

[167] Fix | Delete

/ SLASH

[168] Fix | Delete

| VBAR

[169] Fix | Delete

& AMPER

[170] Fix | Delete

< LESS

[171] Fix | Delete

> GREATER

[172] Fix | Delete

= EQUAL

[173] Fix | Delete

. DOT

[174] Fix | Delete

% PERCENT

[175] Fix | Delete

` BACKQUOTE

[176] Fix | Delete

{ LBRACE

[177] Fix | Delete

} RBRACE

[178] Fix | Delete

@ AT

[179] Fix | Delete

@= ATEQUAL

[180] Fix | Delete

== EQEQUAL

[181] Fix | Delete

!= NOTEQUAL

[182] Fix | Delete

<> NOTEQUAL

[183] Fix | Delete

<= LESSEQUAL

[184] Fix | Delete

>= GREATEREQUAL

[185] Fix | Delete

~ TILDE

[186] Fix | Delete

^ CIRCUMFLEX

[187] Fix | Delete

<< LEFTSHIFT

[188] Fix | Delete

>> RIGHTSHIFT

[189] Fix | Delete

** DOUBLESTAR

[190] Fix | Delete

+= PLUSEQUAL

[191] Fix | Delete

-= MINEQUAL

[192] Fix | Delete

*= STAREQUAL

[193] Fix | Delete

/= SLASHEQUAL

[194] Fix | Delete

%= PERCENTEQUAL

[195] Fix | Delete

&= AMPEREQUAL

[196] Fix | Delete

|= VBAREQUAL

[197] Fix | Delete

^= CIRCUMFLEXEQUAL

[198] Fix | Delete

<<= LEFTSHIFTEQUAL

[199] Fix | Delete

>>= RIGHTSHIFTEQUAL

[200] Fix | Delete

**= DOUBLESTAREQUAL

[201] Fix | Delete

// DOUBLESLASH

[202] Fix | Delete

//= DOUBLESLASHEQUAL

[203] Fix | Delete

-> RARROW

[204] Fix | Delete

"""

[205] Fix | Delete

[206] Fix | Delete

opmap = {}

[207] Fix | Delete

for line in opmap_raw.splitlines():

[208] Fix | Delete

if line:

[209] Fix | Delete

op, name = line.split()

[210] Fix | Delete

opmap[op] = getattr(token, name)

[211] Fix | Delete

[212] Fix | Delete