# Copyright 2006 Google, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.
The grammar is taken from PatternGrammar.txt.
The compiler compiles a pattern to a pytree.*Pattern instance.
__author__ = "Guido van Rossum <guido@python.org>"
from .pgen2 import driver, literals, token, tokenize, parse, grammar
class PatternSyntaxError(Exception):
def tokenize_wrapper(input):
"""Tokenizes a string suppressing significant whitespace."""
skip = set((token.NEWLINE, token.INDENT, token.DEDENT))
tokens = tokenize.generate_tokens(StringIO.StringIO(input).readline)
type, value, start, end, line_text = quintuple
class PatternCompiler(object):
def __init__(self, grammar_file=None):
Takes an optional alternative filename for the pattern grammar.
self.grammar = pygram.pattern_grammar
self.syms = pygram.pattern_symbols
self.grammar = driver.load_grammar(grammar_file)
self.syms = pygram.Symbols(self.grammar)
self.pygrammar = pygram.python_grammar
self.pysyms = pygram.python_symbols
self.driver = driver.Driver(self.grammar, convert=pattern_convert)
def compile_pattern(self, input, debug=False, with_tree=False):
"""Compiles a pattern string to a nested pytree.*Pattern object."""
tokens = tokenize_wrapper(input)
root = self.driver.parse_tokens(tokens, debug=debug)
except parse.ParseError as e:
raise PatternSyntaxError(str(e))
return self.compile_node(root), root
return self.compile_node(root)
def compile_node(self, node):
"""Compiles a node, recursively.
This is one big switch on the node type.
# XXX Optimize certain Wildcard-containing-Wildcard patterns
if node.type == self.syms.Matcher:
node = node.children[0] # Avoid unneeded recursion
if node.type == self.syms.Alternatives:
# Skip the odd children since they are just '|' tokens
alts = [self.compile_node(ch) for ch in node.children[::2]]
p = pytree.WildcardPattern([[a] for a in alts], min=1, max=1)
if node.type == self.syms.Alternative:
units = [self.compile_node(ch) for ch in node.children]
p = pytree.WildcardPattern([units], min=1, max=1)
if node.type == self.syms.NegatedUnit:
pattern = self.compile_basic(node.children[1:])
p = pytree.NegatedPattern(pattern)
assert node.type == self.syms.Unit
if len(nodes) >= 3 and nodes[1].type == token.EQUAL:
if len(nodes) >= 2 and nodes[-1].type == self.syms.Repeater:
# Now we've reduced it to: STRING | NAME [Details] | (...) | [...]
pattern = self.compile_basic(nodes, repeat)
assert repeat.type == self.syms.Repeater
children = repeat.children
if child.type == token.STAR:
elif child.type == token.PLUS:
elif child.type == token.LBRACE:
assert children[-1].type == token.RBRACE
assert len(children) in (3, 5)
min = max = self.get_int(children[1])
max = self.get_int(children[3])
pattern = pattern.optimize()
pattern = pytree.WildcardPattern([[pattern]], min=min, max=max)
return pattern.optimize()
def compile_basic(self, nodes, repeat=None):
# Compile STRING | NAME [Details] | (...) | [...]
if node.type == token.STRING:
value = unicode(literals.evalString(node.value))
return pytree.LeafPattern(_type_of_literal(value), value)
elif node.type == token.NAME:
if value not in TOKEN_MAP:
raise PatternSyntaxError("Invalid token: %r" % value)
raise PatternSyntaxError("Can't have details for token")
return pytree.LeafPattern(TOKEN_MAP[value])
elif not value.startswith("_"):
type = getattr(self.pysyms, value, None)
raise PatternSyntaxError("Invalid symbol: %r" % value)
if nodes[1:]: # Details present
content = [self.compile_node(nodes[1].children[1])]
return pytree.NodePattern(type, content)
return self.compile_node(nodes[1])
subpattern = self.compile_node(nodes[1])
return pytree.WildcardPattern([[subpattern]], min=0, max=1)
assert node.type == token.NUMBER
# Map named tokens to the type value for a LeafPattern
TOKEN_MAP = {"NAME": token.NAME,
def _type_of_literal(value):
elif value in grammar.opmap:
return grammar.opmap[value]
def pattern_convert(grammar, raw_node_info):
"""Converts raw node information to a Node or Leaf instance."""
type, value, context, children = raw_node_info
if children or type in grammar.number2symbol:
return pytree.Node(type, children, context=context)
return pytree.Leaf(type, value, context=context)
def compile_pattern(pattern):
return PatternCompiler().compile_pattern(pattern)