"""Utility functions for copying and archiving files and directory trees.
XXX The functions here don't copy the resource fork or other metadata on Mac.
from os.path import abspath
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
"copytree", "move", "rmtree", "Error", "SpecialFileError",
"ExecError", "make_archive", "get_archive_formats",
"register_archive_format", "unregister_archive_format",
class Error(EnvironmentError):
class SpecialFileError(EnvironmentError):
"""Raised when trying to do a kind of operation (e.g. copying) which is
not supported on a special file (e.g. a named pipe)"""
class ExecError(EnvironmentError):
"""Raised when a command could not be executed"""
def copyfileobj(fsrc, fdst, length=16*1024):
"""copy data from file-like object fsrc to file-like object fdst"""
if hasattr(os.path, 'samefile'):
return os.path.samefile(src, dst)
# All other platforms: check for same pathname.
return (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
"""Copy data from src to dst"""
raise Error("`%s` and `%s` are the same file" % (src, dst))
# File most likely does not exist
# XXX What about other special files? (sockets, devices...)
if stat.S_ISFIFO(st.st_mode):
raise SpecialFileError("`%s` is a named pipe" % fn)
with open(src, 'rb') as fsrc:
with open(dst, 'wb') as fdst:
"""Copy mode bits from src to dst"""
mode = stat.S_IMODE(st.st_mode)
"""Copy all stat info (mode bits, atime, mtime, flags) from src to dst"""
mode = stat.S_IMODE(st.st_mode)
os.utime(dst, (st.st_atime, st.st_mtime))
if hasattr(os, 'chflags') and hasattr(st, 'st_flags'):
os.chflags(dst, st.st_flags)
for err in 'EOPNOTSUPP', 'ENOTSUP':
if hasattr(errno, err) and why.errno == getattr(errno, err):
"""Copy data and mode bits ("cp src dst").
The destination may be a directory.
dst = os.path.join(dst, os.path.basename(src))
"""Copy data and all stat info ("cp -p src dst").
The destination may be a directory.
dst = os.path.join(dst, os.path.basename(src))
def ignore_patterns(*patterns):
"""Function that can be used as copytree() ignore parameter.
Patterns is a sequence of glob-style patterns
that are used to exclude files"""
def _ignore_patterns(path, names):
ignored_names.extend(fnmatch.filter(names, pattern))
return set(ignored_names)
def copytree(src, dst, symlinks=False, ignore=None):
"""Recursively copy a directory tree using copy2().
The destination directory must not already exist.
If exception(s) occur, an Error is raised with a list of reasons.
If the optional symlinks flag is true, symbolic links in the
source tree result in symbolic links in the destination tree; if
it is false, the contents of the files pointed to by symbolic
The optional ignore argument is a callable. If given, it
is called with the `src` parameter, which is the directory
being visited by copytree(), and `names` which is the list of
`src` contents, as returned by os.listdir():
callable(src, names) -> ignored_names
Since copytree() is called recursively, the callable will be
called once for each directory that is copied. It returns a
list of names relative to the `src` directory that should
XXX Consider this example code rather than the ultimate tool.
ignored_names = ignore(src, names)
if name in ignored_names:
srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
if symlinks and os.path.islink(srcname):
linkto = os.readlink(srcname)
os.symlink(linkto, dstname)
elif os.path.isdir(srcname):
copytree(srcname, dstname, symlinks, ignore)
# Will raise a SpecialFileError for unsupported file types
# catch the Error from the recursive copytree so that we can
# continue with other files
errors.extend(err.args[0])
except EnvironmentError, why:
errors.append((srcname, dstname, str(why)))
if WindowsError is not None and isinstance(why, WindowsError):
# Copying file access times may fail on Windows
errors.append((src, dst, str(why)))
def rmtree(path, ignore_errors=False, onerror=None):
"""Recursively delete a directory tree.
If ignore_errors is set, errors are ignored; otherwise, if onerror
is set, it is called to handle the error with arguments (func,
path, exc_info) where func is os.listdir, os.remove, or os.rmdir;
path is the argument to that function that caused it to fail; and
exc_info is a tuple returned by sys.exc_info(). If ignore_errors
is false and onerror is None, an exception is raised.
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
onerror(os.path.islink, path, sys.exc_info())
# can't continue even if onerror hook returns
onerror(os.listdir, path, sys.exc_info())
fullname = os.path.join(path, name)
mode = os.lstat(fullname).st_mode
rmtree(fullname, ignore_errors, onerror)
onerror(os.remove, fullname, sys.exc_info())
onerror(os.rmdir, path, sys.exc_info())
# A basename() variant which first strips the trailing slash, if present.
# Thus we always get the last component of the path, even for directories.
sep = os.path.sep + (os.path.altsep or '')
return os.path.basename(path.rstrip(sep))
"""Recursively move a file or directory to another location. This is
similar to the Unix "mv" command.
If the destination is a directory or a symlink to a directory, the source
is moved inside the directory. The destination path must not already
If the destination already exists but is not a directory, it may be
overwritten depending on os.rename() semantics.
If the destination is on our current filesystem, then rename() is used.
Otherwise, src is copied to the destination and then removed.
A lot more could be done here... A look at a mv.c shows a lot of
the issues this implementation glosses over.
# We might be on a case insensitive filesystem,
# perform the rename anyway.
real_dst = os.path.join(dst, _basename(src))
if os.path.exists(real_dst):
raise Error, "Destination path '%s' already exists" % real_dst
raise Error, "Cannot move a directory '%s' into itself '%s'." % (src, dst)
copytree(src, real_dst, symlinks=True)
def _destinsrc(src, dst):
if not src.endswith(os.path.sep):
if not dst.endswith(os.path.sep):
return dst.startswith(src)
"""Returns a gid, given a group name."""
if getgrnam is None or name is None:
"""Returns an uid, given a user name."""
if getpwnam is None or name is None:
def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0,
owner=None, group=None, logger=None):
"""Create a (possibly compressed) tar file from all the files under
'compress' must be "gzip" (the default), "bzip2", or None.
'owner' and 'group' can be used to define an owner and a group for the
archive that is being built. If not provided, the current owner and group
The output tar file will be named 'base_name' + ".tar", possibly plus
the appropriate compression extension (".gz", or ".bz2").
Returns the output filename.
tar_compression = {'gzip': 'gz', 'bzip2': 'bz2', None: ''}
compress_ext = {'gzip': '.gz', 'bzip2': '.bz2'}
# flags for compression program, each element of list will be an argument
if compress is not None and compress not in compress_ext.keys():
("bad value for 'compress': must be None, 'gzip' or 'bzip2'")
archive_name = base_name + '.tar' + compress_ext.get(compress, '')
archive_dir = os.path.dirname(archive_name)
if archive_dir and not os.path.exists(archive_dir):
logger.info("creating %s", archive_dir)
import tarfile # late import so Python build itself doesn't break
logger.info('Creating tar archive')
def _set_uid_gid(tarinfo):
tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress])
tar.add(base_dir, filter=_set_uid_gid)
def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False):
# XXX see if we want to keep an external call here
from distutils.errors import DistutilsExecError
from distutils.spawn import spawn
spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run)
except DistutilsExecError:
# XXX really should distinguish between "couldn't find
# external 'zip' command" and "zip failed".
("unable to create zip file '%s': "
"could neither import the 'zipfile' module nor "
"find a standalone zip utility") % zip_filename
def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None):
"""Create a zip file from all the files under 'base_dir'.
The output zip file will be named 'base_name' + ".zip". Uses either the
"zipfile" Python module (if available) or the InfoZIP "zip" utility
(if installed and found on the default search path). If neither tool is
available, raises ExecError. Returns the name of the output zip
zip_filename = base_name + ".zip"
archive_dir = os.path.dirname(base_name)
if archive_dir and not os.path.exists(archive_dir):
logger.info("creating %s", archive_dir)
# If zipfile module is not available, try spawning an external 'zip'
_call_external_zip(base_dir, zip_filename, verbose, dry_run)
logger.info("creating '%s' and adding '%s' to it",
with zipfile.ZipFile(zip_filename, "w",
compression=zipfile.ZIP_DEFLATED) as zf:
path = os.path.normpath(base_dir)
logger.info("adding '%s'", path)
for dirpath, dirnames, filenames in os.walk(base_dir):
for name in sorted(dirnames):
path = os.path.normpath(os.path.join(dirpath, name))
logger.info("adding '%s'", path)
path = os.path.normpath(os.path.join(dirpath, name))
logger.info("adding '%s'", path)
'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"),
'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"),
'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"),
'zip': (_make_zipfile, [],"ZIP file")
def get_archive_formats():
"""Returns a list of supported formats for archiving and unarchiving.
Each element of the returned sequence is a tuple (name, description)
formats = [(name, registry[2]) for name, registry in
_ARCHIVE_FORMATS.items()]
def register_archive_format(name, function, extra_args=None, description=''):
"""Registers an archive format.
name is the name of the format. function is the callable that will be
used to create archives. If provided, extra_args is a sequence of
(name, value) tuples that will be passed as arguments to the callable.
description can be provided to describe the format, and will be returned
by the get_archive_formats() function.
if not isinstance(function, collections.Callable):
raise TypeError('The %s object is not callable' % function)