The `ast` module helps Python applications to process trees of the Python
abstract syntax grammar. The abstract syntax itself might change with
each Python release; this module helps to find out programmatically what
the current grammar looks like and allows modifications of it.
An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
a flag to the `compile()` builtin function or by using the `parse()`
function from this module. The result will be a tree of objects whose
classes all inherit from `ast.AST`.
A modified abstract syntax tree can be compiled into a Python code object
using the built-in `compile()` function.
Additionally various helper functions are provided that make working with
the trees simpler. The main intention of the helper functions and this
module in general is to provide an easy to use interface for libraries
that work tightly with the python syntax (template engines for example).
:copyright: Copyright 2008 by Armin Ronacher.
:license: Python License.
def parse(source, filename='<unknown>', mode='exec', *,
type_comments=False, feature_version=None):
Parse the source into an AST node.
Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
Pass type_comments=True to get back type comments where the syntax allows.
flags |= PyCF_TYPE_COMMENTS
if isinstance(feature_version, tuple):
major, minor = feature_version # Should be a 2-tuple.
elif feature_version is None:
# Else it should be an int giving the minor version for 3.x.
return compile(source, filename, mode, flags,
_feature_version=feature_version)
def literal_eval(node_or_string):
Safely evaluate an expression node or a string containing a Python
expression. The string or node provided may only consist of the following
Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
sets, booleans, and None.
if isinstance(node_or_string, str):
node_or_string = parse(node_or_string, mode='eval')
if isinstance(node_or_string, Expression):
node_or_string = node_or_string.body
def _raise_malformed_node(node):
raise ValueError(f'malformed node or string: {node!r}')
if not isinstance(node, Constant) or type(node.value) not in (int, float, complex):
_raise_malformed_node(node)
def _convert_signed_num(node):
if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
operand = _convert_num(node.operand)
if isinstance(node.op, UAdd):
return _convert_num(node)
if isinstance(node, Constant):
elif isinstance(node, Tuple):
return tuple(map(_convert, node.elts))
elif isinstance(node, List):
return list(map(_convert, node.elts))
elif isinstance(node, Set):
return set(map(_convert, node.elts))
elif isinstance(node, Dict):
if len(node.keys) != len(node.values):
_raise_malformed_node(node)
return dict(zip(map(_convert, node.keys),
map(_convert, node.values)))
elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
left = _convert_signed_num(node.left)
right = _convert_num(node.right)
if isinstance(left, (int, float)) and isinstance(right, complex):
if isinstance(node.op, Add):
return _convert_signed_num(node)
return _convert(node_or_string)
def dump(node, annotate_fields=True, include_attributes=False):
Return a formatted dump of the tree in node. This is mainly useful for
debugging purposes. If annotate_fields is true (by default),
the returned string will show the names and the values for fields.
If annotate_fields is false, the result string will be more compact by
omitting unambiguous field names. Attributes such as line
numbers and column offsets are not dumped by default. If this is wanted,
include_attributes can be set to true.
if isinstance(node, AST):
keywords = annotate_fields
for field in node._fields:
value = getattr(node, field)
args.append('%s=%s' % (field, _format(value)))
args.append(_format(value))
if include_attributes and node._attributes:
for a in node._attributes:
args.append('%s=%s' % (a, _format(getattr(node, a))))
return '%s(%s)' % (node.__class__.__name__, ', '.join(args))
elif isinstance(node, list):
return '[%s]' % ', '.join(_format(x) for x in node)
if not isinstance(node, AST):
raise TypeError('expected AST, got %r' % node.__class__.__name__)
def copy_location(new_node, old_node):
Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset`
attributes) from *old_node* to *new_node* if possible, and return *new_node*.
for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset':
if attr in old_node._attributes and attr in new_node._attributes:
value = getattr(old_node, attr, None)
# end_lineno and end_col_offset are optional attributes, and they
# should be copied whether the value is None or not.
if value is not None or (
hasattr(old_node, attr) and attr.startswith("end_")
setattr(new_node, attr, value)
def fix_missing_locations(node):
When you compile a node tree with compile(), the compiler expects lineno and
col_offset attributes for every node that supports them. This is rather
tedious to fill in for generated nodes, so this helper adds these attributes
recursively where not already set, by setting them to the values of the
parent node. It works recursively starting at *node*.
def _fix(node, lineno, col_offset, end_lineno, end_col_offset):
if 'lineno' in node._attributes:
if not hasattr(node, 'lineno'):
if 'end_lineno' in node._attributes:
if not hasattr(node, 'end_lineno'):
node.end_lineno = end_lineno
end_lineno = node.end_lineno
if 'col_offset' in node._attributes:
if not hasattr(node, 'col_offset'):
node.col_offset = col_offset
col_offset = node.col_offset
if 'end_col_offset' in node._attributes:
if not hasattr(node, 'end_col_offset'):
node.end_col_offset = end_col_offset
end_col_offset = node.end_col_offset
for child in iter_child_nodes(node):
_fix(child, lineno, col_offset, end_lineno, end_col_offset)
def increment_lineno(node, n=1):
Increment the line number and end line number of each node in the tree
starting at *node* by *n*. This is useful to "move code" to a different
if 'lineno' in child._attributes:
child.lineno = getattr(child, 'lineno', 0) + n
"end_lineno" in child._attributes
and (end_lineno := getattr(child, "end_lineno", 0)) is not None
child.end_lineno = end_lineno + n
Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
that is present on *node*.
for field in node._fields:
yield field, getattr(node, field)
def iter_child_nodes(node):
Yield all direct child nodes of *node*, that is, all fields that are nodes
and all items of fields that are lists of nodes.
for name, field in iter_fields(node):
if isinstance(field, AST):
elif isinstance(field, list):
if isinstance(item, AST):
def get_docstring(node, clean=True):
Return the docstring for the given node or None if no docstring can
be found. If the node provided does not have docstrings a TypeError
If *clean* is `True`, all tabs are expanded to spaces and any whitespace
that can be uniformly removed from the second line onwards is removed.
if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
raise TypeError("%r can't have docstrings" % node.__class__.__name__)
if not(node.body and isinstance(node.body[0], Expr)):
node = node.body[0].value
if isinstance(node, Str):
elif isinstance(node, Constant) and isinstance(node.value, str):
text = inspect.cleandoc(text)
def _splitlines_no_ff(source):
"""Split a string into lines ignoring form feed and other chars.
This mimics how the Python parser splits source code.
if c == '\r' and idx < len(source) and source[idx] == '\n':
def _pad_whitespace(source):
r"""Replace all chars except '\f\t' in a line with spaces."""
def get_source_segment(source, node, *, padded=False):
"""Get source code segment of the *source* that generated *node*.
If some location information (`lineno`, `end_lineno`, `col_offset`,
or `end_col_offset`) is missing, return None.
If *padded* is `True`, the first line of a multi-line statement will
be padded with spaces to match its original position.
end_lineno = node.end_lineno - 1
col_offset = node.col_offset
end_col_offset = node.end_col_offset
lines = _splitlines_no_ff(source)
return lines[lineno].encode()[col_offset:end_col_offset].decode()
padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode())
first = padding + lines[lineno].encode()[col_offset:].decode()
last = lines[end_lineno].encode()[:end_col_offset].decode()
lines = lines[lineno+1:end_lineno]
Recursively yield all descendant nodes in the tree starting at *node*
(including *node* itself), in no specified order. This is useful if you
only want to modify nodes in place and don't care about the context.
from collections import deque
todo.extend(iter_child_nodes(node))
class NodeVisitor(object):
A node visitor base class that walks the abstract syntax tree and calls a
visitor function for every node found. This function may return a value
which is forwarded by the `visit` method.
This class is meant to be subclassed, with the subclass adding visitor
Per default the visitor functions for the nodes are ``'visit_'`` +
class name of the node. So a `TryFinally` node visit function would
be `visit_TryFinally`. This behavior can be changed by overriding
the `visit` method. If no visitor function exists for a node
(return value `None`) the `generic_visit` visitor is used instead.
Don't use the `NodeVisitor` if you want to apply changes to nodes during
traversing. For this a special visitor exists (`NodeTransformer`) that
method = 'visit_' + node.__class__.__name__
visitor = getattr(self, method, self.generic_visit)
def generic_visit(self, node):
"""Called if no explicit visitor function exists for a node."""
for field, value in iter_fields(node):
if isinstance(value, list):
if isinstance(item, AST):
elif isinstance(value, AST):
def visit_Constant(self, node):
type_name = _const_node_type_names.get(type(value))
for cls, name in _const_node_type_names.items():
if isinstance(value, cls):
if type_name is not None:
method = 'visit_' + type_name
visitor = getattr(self, method)
warnings.warn(f"{method} is deprecated; add visit_Constant",
PendingDeprecationWarning, 2)
return self.generic_visit(node)
class NodeTransformer(NodeVisitor):
A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
allows modification of nodes.
The `NodeTransformer` will walk the AST and use the return value of the
visitor methods to replace or remove the old node. If the return value of
the visitor method is ``None``, the node will be removed from its location,
otherwise it is replaced with the return value. The return value may be the
original node in which case no replacement takes place.
Here is an example transformer that rewrites all occurrences of name lookups
(``foo``) to ``data['foo']``::
class RewriteName(NodeTransformer):
def visit_Name(self, node):
value=Name(id='data', ctx=Load()),
slice=Index(value=Str(s=node.id)),
Keep in mind that if the node you're operating on has child nodes you must
either transform the child nodes yourself or call the :meth:`generic_visit`
method for the node first.
For nodes that were part of a collection of statements (that applies to all
statement nodes), the visitor may also return a list of nodes rather than
Usually you use the transformer like this::
node = YourTransformer().visit(node)
def generic_visit(self, node):
for field, old_value in iter_fields(node):
if isinstance(old_value, list):
if isinstance(value, AST):
value = self.visit(value)
elif not isinstance(value, AST):
old_value[:] = new_values
elif isinstance(old_value, AST):
new_node = self.visit(old_value)
setattr(node, field, new_node)
# The following code is for backward compatibility.
# It will be removed in future.
def _setter(self, value):
Constant.n = property(_getter, _setter)
Constant.s = property(_getter, _setter)
def __instancecheck__(cls, inst):
if not isinstance(inst, Constant):
isinstance(value, _const_types[cls]) and
not isinstance(value, _const_types_not.get(cls, ()))
return type.__instancecheck__(cls, inst)
def _new(cls, *args, **kwargs):
if key not in cls._fields:
# arbitrary keyword arguments are accepted
pos = cls._fields.index(key)
raise TypeError(f"{cls.__name__} got multiple values for argument {key!r}")