# -*- coding: iso-8859-1 -*-
#-------------------------------------------------------------------
#-------------------------------------------------------------------
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
"""Read from and write to tar format archives.
__version__ = "$Revision: 85213 $"
from __builtin__ import open as bltn_open
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
#---------------------------------------------------------
#---------------------------------------------------------
NUL = "\0" # the null character
BLOCKSIZE = 512 # length of processing blocks
RECORDSIZE = BLOCKSIZE * 20 # length of records
GNU_MAGIC = "ustar \0" # magic gnu tar string
POSIX_MAGIC = "ustar\x0000" # magic posix tar string
LENGTH_NAME = 100 # maximum length of a filename
LENGTH_LINK = 100 # maximum length of a linkname
LENGTH_PREFIX = 155 # maximum length of the prefix field
REGTYPE = "0" # regular file
AREGTYPE = "\0" # regular file
LNKTYPE = "1" # link (inside tarfile)
SYMTYPE = "2" # symbolic link
CHRTYPE = "3" # character special device
BLKTYPE = "4" # block special device
DIRTYPE = "5" # directory
FIFOTYPE = "6" # fifo special device
CONTTYPE = "7" # contiguous file
GNUTYPE_LONGNAME = "L" # GNU tar longname
GNUTYPE_LONGLINK = "K" # GNU tar longlink
GNUTYPE_SPARSE = "S" # GNU tar sparse file
XHDTYPE = "x" # POSIX.1-2001 extended header
XGLTYPE = "g" # POSIX.1-2001 global header
SOLARIS_XHDTYPE = "X" # Solaris extended header
USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
GNU_FORMAT = 1 # GNU tar format
PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
DEFAULT_FORMAT = GNU_FORMAT
#---------------------------------------------------------
#---------------------------------------------------------
# File types that tarfile supports:
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
SYMTYPE, DIRTYPE, FIFOTYPE,
CONTTYPE, CHRTYPE, BLKTYPE,
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
# File types that will be treated as a regular file.
REGULAR_TYPES = (REGTYPE, AREGTYPE,
CONTTYPE, GNUTYPE_SPARSE)
# File types that are part of the GNU tar format.
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
# Fields from a pax header that override a TarInfo attribute.
PAX_FIELDS = ("path", "linkpath", "size", "mtime",
"uid", "gid", "uname", "gname")
# Fields in a pax header that are numbers, all other fields
# are treated as strings.
#---------------------------------------------------------
# Bits used in the mode field, values in octal.
#---------------------------------------------------------
S_IFLNK = 0120000 # symbolic link
S_IFREG = 0100000 # regular file
S_IFBLK = 0060000 # block device
S_IFDIR = 0040000 # directory
S_IFCHR = 0020000 # character device
TSUID = 04000 # set UID on execution
TSGID = 02000 # set GID on execution
TUREAD = 0400 # read by owner
TUWRITE = 0200 # write by owner
TUEXEC = 0100 # execute/search by owner
TGREAD = 0040 # read by group
TGWRITE = 0020 # write by group
TGEXEC = 0010 # execute/search by group
TOREAD = 0004 # read by other
TOWRITE = 0002 # write by other
TOEXEC = 0001 # execute/search by other
#---------------------------------------------------------
#---------------------------------------------------------
ENCODING = sys.getfilesystemencoding()
ENCODING = sys.getdefaultencoding()
#---------------------------------------------------------
#---------------------------------------------------------
"""Convert a python string to a null-terminated string buffer.
return s[:length] + (length - len(s)) * NUL
"""Convert a null-terminated string field to a python string.
# Use the string up to the first null char.
"""Convert a number field to a python number.
# There are two possible encodings for a number field, see
n = int(nts(s).strip() or "0", 8)
raise InvalidHeaderError("invalid header")
for i in xrange(len(s) - 1):
def itn(n, digits=8, format=DEFAULT_FORMAT):
"""Convert a python number to a number field.
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
# octal digits followed by a null-byte, this allows values up to
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
# that if necessary. A leading 0200 byte indicates this particular
# encoding, the following digits-1 bytes are a big-endian
# representation. This allows values up to (256**(digits-1))-1.
if 0 <= n < 8 ** (digits - 1):
s = "%0*o" % (digits - 1, n) + NUL
if format != GNU_FORMAT or n >= 256 ** (digits - 1):
raise ValueError("overflow in number field")
# XXX We mimic GNU tar's behaviour with negative numbers,
# this could raise OverflowError.
n = struct.unpack("L", struct.pack("l", n))[0]
for i in xrange(digits - 1):
def uts(s, encoding, errors):
"""Convert a unicode object to a string.
# An extra error handler similar to the -o invalid=UTF-8 option
# in POSIX.1-2001. Replace untranslatable characters with their
return s.encode(encoding, "strict")
except UnicodeEncodeError:
x.append(c.encode(encoding, "strict"))
except UnicodeEncodeError:
x.append(c.encode("utf8"))
return s.encode(encoding, errors)
"""Calculate the checksum for a member's header by summing up all
characters except for the chksum field which is treated as if
it was filled with spaces. According to the GNU tar sources,
some tars (Sun and NeXT) calculate chksum with signed char,
which will be different if there are chars in the buffer with
the high bit set. So we calculate two checksums, unsigned and
unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None):
"""Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content.
shutil.copyfileobj(src, dst)
blocks, remainder = divmod(length, BUFSIZE)
raise IOError("end of file reached")
buf = src.read(remainder)
raise IOError("end of file reached")
"""Convert a file's mode to a string of the form
for table in filemode_table:
class TarError(Exception):
class ExtractError(TarError):
"""General exception for extract errors."""
class ReadError(TarError):
"""Exception for unreadable tar archives."""
class CompressionError(TarError):
"""Exception for unavailable compression methods."""
class StreamError(TarError):
"""Exception for unsupported operations on stream-like TarFiles."""
class HeaderError(TarError):
"""Base exception for header errors."""
class EmptyHeaderError(HeaderError):
"""Exception for empty headers."""
class TruncatedHeaderError(HeaderError):
"""Exception for truncated headers."""
class EOFHeaderError(HeaderError):
"""Exception for end of file headers."""
class InvalidHeaderError(HeaderError):
"""Exception for invalid headers."""
class SubsequentHeaderError(HeaderError):
"""Exception for missing and invalid extended headers."""
#---------------------------
# internal stream interface
#---------------------------
"""Low-level file object. Supports reading and writing.
It is used instead of a regular file object for streaming
def __init__(self, name, mode):
"w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
if hasattr(os, "O_BINARY"):
self.fd = os.open(name, mode, 0666)
return os.read(self.fd, size)
"""Class that serves as an adapter between TarFile and
a stream-like object. The stream-like object only
needs to have a read() or write() method and is accessed
blockwise. Use of gzip or bzip2 compression is possible.
A stream-like object could be for example: sys.stdin,
sys.stdout, a socket, a tape device etc.
_Stream is intended to be used only internally.
def __init__(self, name, mode, comptype, fileobj, bufsize):
"""Construct a _Stream object.
fileobj = _LowLevelFile(name, mode)
# Enable transparent compression detection for the
fileobj = _StreamProxy(fileobj)
comptype = fileobj.getcomptype()
raise CompressionError("zlib module is not available")
self.crc = zlib.crc32("") & 0xffffffffL
raise CompressionError("bz2 module is not available")
self.cmp = bz2.BZ2Decompressor()
self.cmp = bz2.BZ2Compressor()
if hasattr(self, "closed") and not self.closed:
def _init_write_gz(self):
"""Initialize for writing with gzip compression.
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
timestamp = struct.pack("<L", long(time.time()))
self.__write("\037\213\010\010%s\002\377" % timestamp)
if type(self.name) is unicode:
self.name = self.name.encode("iso-8859-1", "replace")
if self.name.endswith(".gz"):
self.name = self.name[:-3]
self.__write(self.name + NUL)
"""Write string s to the stream.
if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
if self.comptype != "tar":
"""Write string s to the stream if a whole new block
while len(self.buf) > self.bufsize:
self.fileobj.write(self.buf[:self.bufsize])
self.buf = self.buf[self.bufsize:]
"""Close the _Stream object. No operation should be
if self.mode == "w" and self.comptype != "tar":
self.buf += self.cmp.flush()
if self.mode == "w" and self.buf: