#-------------------------------------------------------------------
#-------------------------------------------------------------------
# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
"""Read from and write to tar format archives.
__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)"
__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."
from builtins import open as bltn_open
# os.symlink on Windows prior to 6.0 raises NotImplementedError
symlink_exception = (AttributeError, NotImplementedError)
# OSError (winerror=1314) will be raised if the caller does not hold the
# SeCreateSymbolicLinkPrivilege privilege
symlink_exception += (OSError,)
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
"CompressionError", "StreamError", "ExtractError", "HeaderError",
"ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
"DEFAULT_FORMAT", "open"]
# If true, use the safer (but backwards-incompatible) 'tar' extraction filter,
# rather than 'fully_trusted', by default.
# The emitted warning is changed to match.
# System-wide configuration file
_CONFIG_FILENAME = '/etc/python/tarfile.cfg'
#---------------------------------------------------------
#---------------------------------------------------------
NUL = b"\0" # the null character
BLOCKSIZE = 512 # length of processing blocks
RECORDSIZE = BLOCKSIZE * 20 # length of records
GNU_MAGIC = b"ustar \0" # magic gnu tar string
POSIX_MAGIC = b"ustar\x0000" # magic posix tar string
LENGTH_NAME = 100 # maximum length of a filename
LENGTH_LINK = 100 # maximum length of a linkname
LENGTH_PREFIX = 155 # maximum length of the prefix field
REGTYPE = b"0" # regular file
AREGTYPE = b"\0" # regular file
LNKTYPE = b"1" # link (inside tarfile)
SYMTYPE = b"2" # symbolic link
CHRTYPE = b"3" # character special device
BLKTYPE = b"4" # block special device
DIRTYPE = b"5" # directory
FIFOTYPE = b"6" # fifo special device
CONTTYPE = b"7" # contiguous file
GNUTYPE_LONGNAME = b"L" # GNU tar longname
GNUTYPE_LONGLINK = b"K" # GNU tar longlink
GNUTYPE_SPARSE = b"S" # GNU tar sparse file
XHDTYPE = b"x" # POSIX.1-2001 extended header
XGLTYPE = b"g" # POSIX.1-2001 global header
SOLARIS_XHDTYPE = b"X" # Solaris extended header
USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
GNU_FORMAT = 1 # GNU tar format
PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
DEFAULT_FORMAT = PAX_FORMAT
#---------------------------------------------------------
#---------------------------------------------------------
# File types that tarfile supports:
SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
SYMTYPE, DIRTYPE, FIFOTYPE,
CONTTYPE, CHRTYPE, BLKTYPE,
GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
# File types that will be treated as a regular file.
REGULAR_TYPES = (REGTYPE, AREGTYPE,
CONTTYPE, GNUTYPE_SPARSE)
# File types that are part of the GNU tar format.
GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
# Fields from a pax header that override a TarInfo attribute.
PAX_FIELDS = ("path", "linkpath", "size", "mtime",
"uid", "gid", "uname", "gname")
# Fields from a pax header that are affected by hdrcharset.
PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}
# Fields in a pax header that are numbers, all other fields
# are treated as strings.
#---------------------------------------------------------
#---------------------------------------------------------
ENCODING = sys.getfilesystemencoding()
#---------------------------------------------------------
#---------------------------------------------------------
def stn(s, length, encoding, errors):
"""Convert a string to a null-terminated bytes object.
raise ValueError("metadata cannot contain None")
s = s.encode(encoding, errors)
return s[:length] + (length - len(s)) * NUL
def nts(s, encoding, errors):
"""Convert a null-terminated bytes object to a string.
return s.decode(encoding, errors)
"""Convert a number field to a python number.
# There are two possible encodings for a number field, see
if s[0] in (0o200, 0o377):
for i in range(len(s) - 1):
n = -(256 ** (len(s) - 1) - n)
s = nts(s, "ascii", "strict")
n = int(s.strip() or "0", 8)
raise InvalidHeaderError("invalid header")
def itn(n, digits=8, format=DEFAULT_FORMAT):
"""Convert a python number to a number field.
# POSIX 1003.1-1988 requires numbers to be encoded as a string of
# octal digits followed by a null-byte, this allows values up to
# (8**(digits-1))-1. GNU tar allows storing numbers greater than
# that if necessary. A leading 0o200 or 0o377 byte indicate this
# particular encoding, the following digits-1 bytes are a big-endian
# base-256 representation. This allows values up to (256**(digits-1))-1.
# A 0o200 byte indicates a positive number, a 0o377 byte a negative
if 0 <= n < 8 ** (digits - 1):
s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL
elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):
for i in range(digits - 1):
raise ValueError("overflow in number field")
"""Calculate the checksum for a member's header by summing up all
characters except for the chksum field which is treated as if
it was filled with spaces. According to the GNU tar sources,
some tars (Sun and NeXT) calculate chksum with signed char,
which will be different if there are chars in the buffer with
the high bit set. So we calculate two checksums, unsigned and
unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))
signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
return unsigned_chksum, signed_chksum
def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):
"""Copy length bytes from fileobj src to fileobj dst.
If length is None, copy the entire content.
bufsize = bufsize or 16 * 1024
shutil.copyfileobj(src, dst, bufsize)
blocks, remainder = divmod(length, bufsize)
raise exception("unexpected end of data")
buf = src.read(remainder)
raise exception("unexpected end of data")
encoding = getattr(sys.stdout, 'encoding', None)
s = s.encode(encoding, 'backslashreplace').decode(encoding)
class TarError(Exception):
class ExtractError(TarError):
"""General exception for extract errors."""
class ReadError(TarError):
"""Exception for unreadable tar archives."""
class CompressionError(TarError):
"""Exception for unavailable compression methods."""
class StreamError(TarError):
"""Exception for unsupported operations on stream-like TarFiles."""
class HeaderError(TarError):
"""Base exception for header errors."""
class EmptyHeaderError(HeaderError):
"""Exception for empty headers."""
class TruncatedHeaderError(HeaderError):
"""Exception for truncated headers."""
class EOFHeaderError(HeaderError):
"""Exception for end of file headers."""
class InvalidHeaderError(HeaderError):
"""Exception for invalid headers."""
class SubsequentHeaderError(HeaderError):
"""Exception for missing and invalid extended headers."""
#---------------------------
# internal stream interface
#---------------------------
"""Low-level file object. Supports reading and writing.
It is used instead of a regular file object for streaming
def __init__(self, name, mode):
"w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
if hasattr(os, "O_BINARY"):
self.fd = os.open(name, mode, 0o666)
return os.read(self.fd, size)
"""Class that serves as an adapter between TarFile and
a stream-like object. The stream-like object only
needs to have a read() or write() method and is accessed
blockwise. Use of gzip or bzip2 compression is possible.
A stream-like object could be for example: sys.stdin,
sys.stdout, a socket, a tape device etc.
_Stream is intended to be used only internally.
def __init__(self, name, mode, comptype, fileobj, bufsize):
"""Construct a _Stream object.
fileobj = _LowLevelFile(name, mode)
# Enable transparent compression detection for the
fileobj = _StreamProxy(fileobj)
comptype = fileobj.getcomptype()
raise CompressionError("zlib module is not available")
self.crc = zlib.crc32(b"")
self.exception = zlib.error
raise CompressionError("bz2 module is not available")
self.cmp = bz2.BZ2Decompressor()
self.cmp = bz2.BZ2Compressor()
raise CompressionError("lzma module is not available")
self.cmp = lzma.LZMADecompressor()
self.exception = lzma.LZMAError
self.cmp = lzma.LZMACompressor()
raise CompressionError("unknown compression type %r" % comptype)
if hasattr(self, "closed") and not self.closed:
def _init_write_gz(self):
"""Initialize for writing with gzip compression.
self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
timestamp = struct.pack("<L", int(time.time()))
self.__write(b"\037\213\010\010" + timestamp + b"\002\377")
if self.name.endswith(".gz"):
self.name = self.name[:-3]
# Honor "directory components removed" from RFC1952
self.name = os.path.basename(self.name)
# RFC1952 says we must use ISO-8859-1 for the FNAME field.
self.__write(self.name.encode("iso-8859-1", "replace") + NUL)
"""Write string s to the stream.
if self.comptype == "gz":
self.crc = self.zlib.crc32(s, self.crc)
if self.comptype != "tar":
"""Write string s to the stream if a whole new block
while len(self.buf) > self.bufsize:
self.fileobj.write(self.buf[:self.bufsize])
self.buf = self.buf[self.bufsize:]
"""Close the _Stream object. No operation should be
if self.mode == "w" and self.comptype != "tar":
self.buf += self.cmp.flush()
if self.mode == "w" and self.buf:
self.fileobj.write(self.buf)
if self.comptype == "gz":
self.fileobj.write(struct.pack("<L", self.crc))
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))
"""Initialize for reading a gzip compressed fileobj.
self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
# taken from gzip.GzipFile with some alterations
if self.__read(2) != b"\037\213":
raise ReadError("not a gzip file")
if self.__read(1) != b"\010":
raise CompressionError("unsupported compression method")
flag = ord(self.__read(1))
xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))