"""Find modules used by a script, using introspection."""
import importlib._bootstrap_external
import importlib.machinery
LOAD_CONST = dis.opmap['LOAD_CONST']
IMPORT_NAME = dis.opmap['IMPORT_NAME']
STORE_NAME = dis.opmap['STORE_NAME']
STORE_GLOBAL = dis.opmap['STORE_GLOBAL']
STORE_OPS = STORE_NAME, STORE_GLOBAL
EXTENDED_ARG = dis.EXTENDED_ARG
# Modulefinder does a good job at simulating Python's, but it can not
# handle __path__ modifications packages make at runtime. Therefore there
# is a mechanism whereby you can register extra paths in this map for a
# package, and it will be honored.
# Note this is a mapping is lists of paths.
def AddPackagePath(packagename, path):
packagePathMap.setdefault(packagename, []).append(path)
# This ReplacePackage mechanism allows modulefinder to work around
# situations in which a package injects itself under the name
# of another package into sys.modules at runtime by calling
# ReplacePackage("real_package_name", "faked_package_name")
# before running ModuleFinder.
def ReplacePackage(oldname, newname):
replacePackageMap[oldname] = newname
def _find_module(name, path=None):
"""An importlib reimplementation of imp.find_module (for our purposes)."""
# It's necessary to clear the caches for our Finder first, in case any
# modules are being added/deleted/modified at runtime. In particular,
# test_modulefinder.py changes file tree contents in a cache-breaking way:
importlib.machinery.PathFinder.invalidate_caches()
spec = importlib.machinery.PathFinder.find_spec(name, path)
raise ImportError("No module named {name!r}".format(name=name), name=name)
if spec.loader is importlib.machinery.BuiltinImporter:
return None, None, ("", "", _C_BUILTIN)
if spec.loader is importlib.machinery.FrozenImporter:
return None, None, ("", "", _PY_FROZEN)
if spec.loader.is_package(name):
return None, os.path.dirname(file_path), ("", "", _PKG_DIRECTORY)
if isinstance(spec.loader, importlib.machinery.SourceFileLoader):
elif isinstance(spec.loader, importlib.machinery.ExtensionFileLoader):
elif isinstance(spec.loader, importlib.machinery.SourcelessFileLoader):
else: # Should never happen.
return None, None, ("", "", _SEARCH_ERROR)
file = io.open_code(file_path)
suffix = os.path.splitext(file_path)[-1]
return file, file_path, (suffix, "rb", kind)
def __init__(self, name, file=None, path=None):
# The set of global names that are assigned to in the module.
# This includes those names imported through starimports of
# The set of starimports this module did that could not be
# resolved, ie. a starimport from a non-Python module.
s = "Module(%r" % (self.__name__,)
if self.__file__ is not None:
s = s + ", %r" % (self.__file__,)
if self.__path__ is not None:
s = s + ", %r" % (self.__path__,)
def __init__(self, path=None, debug=0, excludes=None, replace_paths=None):
self.excludes = excludes if excludes is not None else []
self.replace_paths = replace_paths if replace_paths is not None else []
self.processed_paths = [] # Used in debugging only
def msg(self, level, str, *args):
for i in range(self.indent):
print(repr(arg), end=' ')
self.indent = self.indent + 1
self.indent = self.indent - 1
def run_script(self, pathname):
self.msg(2, "run_script", pathname)
with io.open_code(pathname) as fp:
stuff = ("", "rb", _PY_SOURCE)
self.load_module('__main__', fp, pathname, stuff)
def load_file(self, pathname):
dir, name = os.path.split(pathname)
name, ext = os.path.splitext(name)
with io.open_code(pathname) as fp:
stuff = (ext, "rb", _PY_SOURCE)
self.load_module(name, fp, pathname, stuff)
def import_hook(self, name, caller=None, fromlist=None, level=-1):
self.msg(3, "import_hook", name, caller, fromlist, level)
parent = self.determine_parent(caller, level=level)
q, tail = self.find_head_package(parent, name)
m = self.load_tail(q, tail)
self.ensure_fromlist(m, fromlist)
def determine_parent(self, caller, level=-1):
self.msgin(4, "determine_parent", caller, level)
if not caller or level == 0:
self.msgout(4, "determine_parent -> None")
if level >= 1: # relative import
parent = self.modules[pname]
self.msgout(4, "determine_parent ->", parent)
if pname.count(".") < level:
raise ImportError("relative importpath too deep")
pname = ".".join(pname.split(".")[:-level])
parent = self.modules[pname]
self.msgout(4, "determine_parent ->", parent)
parent = self.modules[pname]
self.msgout(4, "determine_parent ->", parent)
parent = self.modules[pname]
assert parent.__name__ == pname
self.msgout(4, "determine_parent ->", parent)
self.msgout(4, "determine_parent -> None")
def find_head_package(self, parent, name):
self.msgin(4, "find_head_package", parent, name)
qname = "%s.%s" % (parent.__name__, head)
q = self.import_module(head, qname, parent)
self.msgout(4, "find_head_package ->", (q, tail))
q = self.import_module(head, qname, parent)
self.msgout(4, "find_head_package ->", (q, tail))
self.msgout(4, "raise ImportError: No module named", qname)
raise ImportError("No module named " + qname)
def load_tail(self, q, tail):
self.msgin(4, "load_tail", q, tail)
head, tail = tail[:i], tail[i+1:]
mname = "%s.%s" % (m.__name__, head)
m = self.import_module(head, mname, m)
self.msgout(4, "raise ImportError: No module named", mname)
raise ImportError("No module named " + mname)
self.msgout(4, "load_tail ->", m)
def ensure_fromlist(self, m, fromlist, recursive=0):
self.msg(4, "ensure_fromlist", m, fromlist, recursive)
all = self.find_all_submodules(m)
self.ensure_fromlist(m, all, 1)
elif not hasattr(m, sub):
subname = "%s.%s" % (m.__name__, sub)
submod = self.import_module(sub, subname, m)
raise ImportError("No module named " + subname)
def find_all_submodules(self, m):
# 'suffixes' used to be a list hardcoded to [".py", ".pyc"].
# But we must also collect Python extension modules - although
# we cannot separate normal dlls from Python extensions.
suffixes += importlib.machinery.EXTENSION_SUFFIXES[:]
suffixes += importlib.machinery.SOURCE_SUFFIXES[:]
suffixes += importlib.machinery.BYTECODE_SUFFIXES[:]
self.msg(2, "can't list directory", dir)
if mod and mod != "__init__":
def import_module(self, partname, fqname, parent):
self.msgin(3, "import_module", partname, fqname, parent)
self.msgout(3, "import_module ->", m)
if fqname in self.badmodules:
self.msgout(3, "import_module -> None")
if parent and parent.__path__ is None:
self.msgout(3, "import_module -> None")
fp, pathname, stuff = self.find_module(partname,
parent and parent.__path__, parent)
self.msgout(3, "import_module ->", None)
m = self.load_module(fqname, fp, pathname, stuff)
setattr(parent, partname, m)
self.msgout(3, "import_module ->", m)
def load_module(self, fqname, fp, pathname, file_info):
suffix, mode, type = file_info
self.msgin(2, "load_module", fqname, fp and "fp", pathname)
if type == _PKG_DIRECTORY:
m = self.load_package(fqname, pathname)
self.msgout(2, "load_module ->", m)
co = compile(fp.read(), pathname, 'exec')
elif type == _PY_COMPILED:
importlib._bootstrap_external._classify_pyc(data, fqname, {})
except ImportError as exc:
self.msgout(2, "raise ImportError: " + str(exc), pathname)
co = marshal.loads(memoryview(data)[16:])
m = self.add_module(fqname)
co = self.replace_paths_in_code(co)
self.msgout(2, "load_module ->", m)
def _add_badmodule(self, name, caller):
if name not in self.badmodules:
self.badmodules[name] = {}
self.badmodules[name][caller.__name__] = 1
self.badmodules[name]["-"] = 1
def _safe_import_hook(self, name, caller, fromlist, level=-1):
# wrapper for self.import_hook() that won't raise ImportError
if name in self.badmodules:
self._add_badmodule(name, caller)
self.import_hook(name, caller, level=level)
except ImportError as msg:
self.msg(2, "ImportError:", str(msg))
self._add_badmodule(name, caller)
except SyntaxError as msg:
self.msg(2, "SyntaxError:", str(msg))
self._add_badmodule(name, caller)
fullname = name + "." + sub
if fullname in self.badmodules:
self._add_badmodule(fullname, caller)
self.import_hook(name, caller, [sub], level=level)
except ImportError as msg:
self.msg(2, "ImportError:", str(msg))
self._add_badmodule(fullname, caller)
def scan_opcodes(self, co):
# Scan the code, and yield 'interesting' opcode combinations
opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code)
for i, (op, oparg) in enumerate(opargs):
yield "store", (names[oparg],)
if (op == IMPORT_NAME and i >= 2
and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST):
level = consts[opargs[i-2][1]]
fromlist = consts[opargs[i-1][1]]
if level == 0: # absolute import
yield "absolute_import", (fromlist, names[oparg])
yield "relative_import", (level, fromlist, names[oparg])
def scan_code(self, co, m):
scanner = self.scan_opcodes
for what, args in scanner(co):
elif what == "absolute_import":
fromlist = [f for f in fromlist if f != "*"]
self._safe_import_hook(name, m, fromlist, level=0)
# We've encountered an "import *". If it is a Python module,
# the code has already been parsed and we can suck out the
# At this point we don't know whether 'name' is a
# submodule of 'm' or a global module. Let's just try
mm = self.modules.get(m.__name__ + "." + name)
mm = self.modules.get(name)
m.globalnames.update(mm.globalnames)
m.starimports.update(mm.starimports)
elif what == "relative_import":
level, fromlist, name = args
self._safe_import_hook(name, m, fromlist, level=level)
parent = self.determine_parent(m, level=level)
self._safe_import_hook(parent.__name__, None, fromlist, level=0)
# We don't expect anything else from the generator.
if isinstance(c, type(co)):
def load_package(self, fqname, pathname):
self.msgin(2, "load_package", fqname, pathname)
newname = replacePackageMap.get(fqname)
m = self.add_module(fqname)
# As per comment at top of file, simulate runtime __path__ additions.
m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
fp, buf, stuff = self.find_module("__init__", m.__path__)
self.load_module(fqname, fp, buf, stuff)
self.msgout(2, "load_package ->", m)
def add_module(self, fqname):
if fqname in self.modules:
return self.modules[fqname]
self.modules[fqname] = m = Module(fqname)
def find_module(self, name, path, parent=None):
# assert path is not None
fullname = parent.__name__+'.'+name
if fullname in self.excludes:
self.msgout(3, "find_module -> Excluded", fullname)