namespace. Each `DocTest` defines the following attributes:
- examples: the list of examples.
- globs: The namespace (aka globals) that the examples should
- name: A name identifying the DocTest (typically, the name of
the object whose docstring this DocTest was extracted from).
- filename: The name of the file that this DocTest was extracted
from, or `None` if the filename is unknown.
- lineno: The line number within filename where this DocTest
begins, or `None` if the line number is unavailable. This
line number is zero-based, with respect to the beginning of
- docstring: The string that the examples were extracted from,
or `None` if the string is unavailable.
def __init__(self, examples, globs, name, filename, lineno, docstring):
Create a new DocTest containing the given examples. The
DocTest's globals are initialized with a copy of `globs`.
assert not isinstance(examples, str), \
"DocTest no longer accepts str; use DocTestParser instead"
self.docstring = docstring
self.globs = globs.copy()
if len(self.examples) == 0:
elif len(self.examples) == 1:
examples = '%d examples' % len(self.examples)
return ('<%s %s from %s:%s (%s)>' %
(self.__class__.__name__,
self.name, self.filename, self.lineno, examples))
if type(self) is not type(other):
return self.examples == other.examples and \
self.docstring == other.docstring and \
self.globs == other.globs and \
self.name == other.name and \
self.filename == other.filename and \
self.lineno == other.lineno
return hash((self.docstring, self.name, self.filename, self.lineno))
# This lets us sort tests by name:
if not isinstance(other, DocTest):
return ((self.name, self.filename, self.lineno, id(self))
(other.name, other.filename, other.lineno, id(other)))
######################################################################
######################################################################
A class used to parse strings containing doctest examples.
# This regular expression is used to find doctest examples in a
# string. It defines three groups: `source` is the source code
# (including leading indentation and prompts); `indent` is the
# indentation of the first (PS1) line of the source code; and
# `want` is the expected output (including leading indentation).
_EXAMPLE_RE = re.compile(r'''
# Source consists of a PS1 line followed by zero or more PS2 lines.
(?:^(?P<indent> [ ]*) >>> .*) # PS1 line
(?:\n [ ]* \.\.\. .*)*) # PS2 lines
# Want consists of any non-blank lines that do not start with PS1.
(?P<want> (?:(?![ ]*$) # Not a blank line
(?![ ]*>>>) # Not a line starting with PS1
.+$\n? # But any other line
''', re.MULTILINE | re.VERBOSE)
# A regular expression for handling `want` strings that contain
# expected exceptions. It divides `want` into three pieces:
# - the traceback header line (`hdr`)
# - the traceback stack (`stack`)
# - the exception message (`msg`), as generated by
# traceback.format_exception_only()
# `msg` may have multiple lines. We assume/require that the
# exception message is the first non-indented line starting with a word
# character following the traceback header line.
_EXCEPTION_RE = re.compile(r"""
# Grab the traceback header. Different versions of Python have
# said different things on the first traceback line.
(?: most\ recent\ call\ last
\s* $ # toss trailing whitespace on the header.
(?P<stack> .*?) # don't blink: absorb stuff until...
^ (?P<msg> \w+ .*) # a line *starts* with alphanum.
""", re.VERBOSE | re.MULTILINE | re.DOTALL)
# A callable returning a true value iff its argument is a blank line
# or contains a single comment.
_IS_BLANK_OR_COMMENT = re.compile(r'^[ ]*(#.*)?$').match
def parse(self, string, name='<string>'):
Divide the given string into examples and intervening text,
and return them as a list of alternating Examples and strings.
Line numbers for the Examples are 0-based. The optional
argument `name` is a name identifying this string, and is only
string = string.expandtabs()
# If all lines begin with the same indentation, then strip it.
min_indent = self._min_indent(string)
string = '\n'.join([l[min_indent:] for l in string.split('\n')])
# Find all doctest examples in the string:
for m in self._EXAMPLE_RE.finditer(string):
# Add the pre-example text to `output`.
output.append(string[charno:m.start()])
# Update lineno (lines before this example)
lineno += string.count('\n', charno, m.start())
# Extract info from the regexp match.
(source, options, want, exc_msg) = \
self._parse_example(m, name, lineno)
# Create an Example, and add it to the list.
if not self._IS_BLANK_OR_COMMENT(source):
output.append( Example(source, want, exc_msg,
indent=min_indent+len(m.group('indent')),
# Update lineno (lines inside this example)
lineno += string.count('\n', m.start(), m.end())
# Add any remaining post-example text to `output`.
output.append(string[charno:])
def get_doctest(self, string, globs, name, filename, lineno):
Extract all doctest examples from the given string, and
collect them into a `DocTest` object.
`globs`, `name`, `filename`, and `lineno` are attributes for
the new `DocTest` object. See the documentation for `DocTest`
return DocTest(self.get_examples(string, name), globs,
name, filename, lineno, string)
def get_examples(self, string, name='<string>'):
Extract all doctest examples from the given string, and return
them as a list of `Example` objects. Line numbers are
0-based, because it's most common in doctests that nothing
interesting appears on the same line as opening triple-quote,
and so the first interesting line is called \"line 1\" then.
The optional argument `name` is a name identifying this
string, and is only used for error messages.
return [x for x in self.parse(string, name)
if isinstance(x, Example)]
def _parse_example(self, m, name, lineno):
Given a regular expression match from `_EXAMPLE_RE` (`m`),
return a pair `(source, want)`, where `source` is the matched
example's source code (with prompts and indentation stripped);
and `want` is the example's expected output (with indentation
`name` is the string's name, and `lineno` is the line number
where the example starts; both are used for error messages.
# Get the example's indentation level.
indent = len(m.group('indent'))
# Divide source into lines; check that they're properly
# indented; and then strip their indentation & prompts.
source_lines = m.group('source').split('\n')
self._check_prompt_blank(source_lines, indent, name, lineno)
self._check_prefix(source_lines[1:], ' '*indent + '.', name, lineno)
source = '\n'.join([sl[indent+4:] for sl in source_lines])
# Divide want into lines; check that it's properly indented; and
# then strip the indentation. Spaces before the last newline should
# be preserved, so plain rstrip() isn't good enough.
want_lines = want.split('\n')
if len(want_lines) > 1 and re.match(r' *$', want_lines[-1]):
del want_lines[-1] # forget final newline & spaces after it
self._check_prefix(want_lines, ' '*indent, name,
lineno + len(source_lines))
want = '\n'.join([wl[indent:] for wl in want_lines])
# If `want` contains a traceback message, then extract it.
m = self._EXCEPTION_RE.match(want)
# Extract options from the source.
options = self._find_options(source, name, lineno)
return source, options, want, exc_msg
# This regular expression looks for option directives in the
# source code of an example. Option directives are comments
# starting with "doctest:". Warning: this may give false
# positives for string-literals that contain the string
# "#doctest:". Eliminating these false positives would require
# actually parsing the string; but we limit them by ignoring any
# line containing "#doctest:" that is *followed* by a quote mark.
_OPTION_DIRECTIVE_RE = re.compile(r'#\s*doctest:\s*([^\n\'"]*)$',
def _find_options(self, source, name, lineno):
Return a dictionary containing option overrides extracted from
option directives in the given source string.
`name` is the string's name, and `lineno` is the line number
where the example starts; both are used for error messages.
# (note: with the current regexp, this will match at most once:)
for m in self._OPTION_DIRECTIVE_RE.finditer(source):
option_strings = m.group(1).replace(',', ' ').split()
for option in option_strings:
if (option[0] not in '+-' or
option[1:] not in OPTIONFLAGS_BY_NAME):
raise ValueError('line %r of the doctest for %s '
'has an invalid option: %r' %
(lineno+1, name, option))
flag = OPTIONFLAGS_BY_NAME[option[1:]]
options[flag] = (option[0] == '+')
if options and self._IS_BLANK_OR_COMMENT(source):
raise ValueError('line %r of the doctest for %s has an option '
'directive on a line with no example: %r' %
# This regular expression finds the indentation of every non-blank
_INDENT_RE = re.compile(r'^([ ]*)(?=\S)', re.MULTILINE)
def _min_indent(self, s):
"Return the minimum indentation of any non-blank line in `s`"
indents = [len(indent) for indent in self._INDENT_RE.findall(s)]
def _check_prompt_blank(self, lines, indent, name, lineno):
Given the lines of a source string (including prompts and
leading indentation), check to make sure that every prompt is
followed by a space character. If any line is not followed by
a space character, then raise ValueError.
for i, line in enumerate(lines):
if len(line) >= indent+4 and line[indent+3] != ' ':
raise ValueError('line %r of the docstring for %s '
'lacks blank after %s: %r' %
line[indent:indent+3], line))
def _check_prefix(self, lines, prefix, name, lineno):
Check that every line in the given list starts with the given
prefix; if any line does not, then raise a ValueError.
for i, line in enumerate(lines):
if line and not line.startswith(prefix):
raise ValueError('line %r of the docstring for %s has '
'inconsistent leading whitespace: %r' %
(lineno+i+1, name, line))
######################################################################
######################################################################
A class used to extract the DocTests that are relevant to a given
object, from its docstring and the docstrings of its contained
objects. Doctests can currently be extracted from the following
object types: modules, functions, classes, methods, staticmethods,
classmethods, and properties.
def __init__(self, verbose=False, parser=DocTestParser(),
recurse=True, exclude_empty=True):
Create a new doctest finder.
The optional argument `parser` specifies a class or
function that should be used to create new DocTest objects (or
objects that implement the same interface as DocTest). The
signature for this factory function should match the signature
of the DocTest constructor.
If the optional argument `recurse` is false, then `find` will
only examine the given object, and not any contained objects.
If the optional argument `exclude_empty` is false, then `find`
will include tests for objects with empty docstrings.
self._exclude_empty = exclude_empty
def find(self, obj, name=None, module=None, globs=None, extraglobs=None):
Return a list of the DocTests that are defined by the given
object's docstring, or by any of its contained objects'
The optional parameter `module` is the module that contains
the given object. If the module is not specified or is None, then
the test finder will attempt to automatically determine the
correct module. The object's module is used:
- As a default namespace, if `globs` is not specified.
- To prevent the DocTestFinder from extracting DocTests
from objects that are imported from other modules.
- To find the name of the file containing the object.
- To help find the line number of the object within its
Contained objects whose module does not match `module` are ignored.
If `module` is False, no attempt to find the module will be made.
This is obscure, of use mostly in tests: if `module` is False, or
is None but cannot be found automatically, then all objects are
considered to belong to the (non-existent) module, so all contained
objects will (recursively) be searched for doctests.
The globals for each DocTest is formed by combining `globs`
and `extraglobs` (bindings in `extraglobs` override bindings
in `globs`). A new copy of the globals dictionary is created
for each DocTest. If `globs` is not specified, then it
defaults to the module's `__dict__`, if specified, or {}
otherwise. If `extraglobs` is not specified, then it defaults
# If name was not specified, then extract it from the object.
name = getattr(obj, '__name__', None)
raise ValueError("DocTestFinder.find: name must be given "
"when obj.__name__ doesn't exist: %r" %
# Find the module that contains the given object (if obj is
# a module, then module=obj.). Note: this may fail, in which
# case module will be None.
module = inspect.getmodule(obj)
# Read the module's source code. This is used by
# DocTestFinder._find_lineno to find the line number for a
# given object's docstring.
file = inspect.getsourcefile(obj)
# Check to see if it's one of our special internal "files"
# (see __patched_linecache_getlines).
file = inspect.getfile(obj)
if not file[0]+file[-2:] == '<]>': file = None
# Supply the module globals in case the module was
# originally loaded via a PEP 302 loader and
# file is not a valid filesystem path
source_lines = linecache.getlines(file, module.__dict__)
# No access to a loader, so assume it's a normal
source_lines = linecache.getlines(file)
# Initialize globals, and merge in extraglobs.
globs = module.__dict__.copy()
if extraglobs is not None:
if '__name__' not in globs:
globs['__name__'] = '__main__' # provide a default module name
# Recursively explore `obj`, extracting DocTests.
self._find(tests, obj, name, module, source_lines, globs, {})
# Sort the tests by alpha order of names, for consistency in
# verbose-mode output. This was a feature of doctest in Pythons
# <= 2.3 that got lost by accident in 2.4. It was repaired in
def _from_module(self, module, object):
Return true if the given object is defined in the given
elif inspect.getmodule(object) is not None:
return module is inspect.getmodule(object)
elif inspect.isfunction(object):
return module.__dict__ is object.__globals__
elif inspect.ismethoddescriptor(object):
if hasattr(object, '__objclass__'):
obj_mod = object.__objclass__.__module__
elif hasattr(object, '__module__'):
obj_mod = object.__module__
return True # [XX] no easy way to tell otherwise
return module.__name__ == obj_mod
elif inspect.isclass(object):
return module.__name__ == object.__module__
elif hasattr(object, '__module__'):
return module.__name__ == object.__module__
elif isinstance(object, property):
return True # [XX] no way not be sure.
raise ValueError("object must be a class or function")
def _find(self, tests, obj, name, module, source_lines, globs, seen):
Find tests for the given object and any contained objects, and
print('Finding tests in %s' % name)
# If we've already processed this object, then ignore it.
# Find a test for this object, and add it to the list of tests.
test = self._get_test(obj, name, module, globs, source_lines)
# Look for tests in a module's contained objects.
if inspect.ismodule(obj) and self._recurse:
for valname, val in obj.__dict__.items():
valname = '%s.%s' % (name, valname)
# Recurse to functions & classes.
if ((inspect.isroutine(inspect.unwrap(val))
or inspect.isclass(val)) and
self._from_module(module, val)):
self._find(tests, val, valname, module, source_lines,
# Look for tests in a module's __test__ dictionary.
if inspect.ismodule(obj) and self._recurse:
for valname, val in getattr(obj, '__test__', {}).items():