Edit File by line

"""Interface to the liblzma compression library.

[0] Fix | Delete

[1] Fix | Delete

This module provides a class for reading and writing compressed files,

[2] Fix | Delete

classes for incremental (de)compression, and convenience functions for

[3] Fix | Delete

one-shot (de)compression.

[4] Fix | Delete

[5] Fix | Delete

These classes and functions support both the XZ and legacy LZMA

[6] Fix | Delete

container formats, as well as raw compressed data streams.

[7] Fix | Delete

"""

[8] Fix | Delete

[9] Fix | Delete

__all__ = [

[10] Fix | Delete

"CHECK_NONE", "CHECK_CRC32", "CHECK_CRC64", "CHECK_SHA256",

[11] Fix | Delete

"CHECK_ID_MAX", "CHECK_UNKNOWN",

[12] Fix | Delete

"FILTER_LZMA1", "FILTER_LZMA2", "FILTER_DELTA", "FILTER_X86", "FILTER_IA64",

[13] Fix | Delete

"FILTER_ARM", "FILTER_ARMTHUMB", "FILTER_POWERPC", "FILTER_SPARC",

[14] Fix | Delete

"FORMAT_AUTO", "FORMAT_XZ", "FORMAT_ALONE", "FORMAT_RAW",

[15] Fix | Delete

"MF_HC3", "MF_HC4", "MF_BT2", "MF_BT3", "MF_BT4",

[16] Fix | Delete

"MODE_FAST", "MODE_NORMAL", "PRESET_DEFAULT", "PRESET_EXTREME",

[17] Fix | Delete

[18] Fix | Delete

"LZMACompressor", "LZMADecompressor", "LZMAFile", "LZMAError",

[19] Fix | Delete

"open", "compress", "decompress", "is_check_supported",

[20] Fix | Delete

]

[21] Fix | Delete

[22] Fix | Delete

import builtins

[23] Fix | Delete

import io

[24] Fix | Delete

import os

[25] Fix | Delete

from _lzma import *

[26] Fix | Delete

from _lzma import _encode_filter_properties, _decode_filter_properties

[27] Fix | Delete

import _compression

[28] Fix | Delete

[29] Fix | Delete

[30] Fix | Delete

_MODE_CLOSED = 0

[31] Fix | Delete

_MODE_READ = 1

[32] Fix | Delete

# Value 2 no longer used

[33] Fix | Delete

_MODE_WRITE = 3

[34] Fix | Delete

[35] Fix | Delete

[36] Fix | Delete

class LZMAFile(_compression.BaseStream):

[37] Fix | Delete

[38] Fix | Delete

"""A file object providing transparent LZMA (de)compression.

[39] Fix | Delete

[40] Fix | Delete

An LZMAFile can act as a wrapper for an existing file object, or

[41] Fix | Delete

refer directly to a named file on disk.

[42] Fix | Delete

[43] Fix | Delete

Note that LZMAFile provides a *binary* file interface - data read

[44] Fix | Delete

is returned as bytes, and data to be written must be given as bytes.

[45] Fix | Delete

"""

[46] Fix | Delete

[47] Fix | Delete

def __init__(self, filename=None, mode="r", *,

[48] Fix | Delete

format=None, check=-1, preset=None, filters=None):

[49] Fix | Delete

"""Open an LZMA-compressed file in binary mode.

[50] Fix | Delete

[51] Fix | Delete

filename can be either an actual file name (given as a str,

[52] Fix | Delete

bytes, or PathLike object), in which case the named file is

[53] Fix | Delete

opened, or it can be an existing file object to read from or

[54] Fix | Delete

write to.

[55] Fix | Delete

[56] Fix | Delete

mode can be "r" for reading (default), "w" for (over)writing,

[57] Fix | Delete

"x" for creating exclusively, or "a" for appending. These can

[58] Fix | Delete

equivalently be given as "rb", "wb", "xb" and "ab" respectively.

[59] Fix | Delete

[60] Fix | Delete

format specifies the container format to use for the file.

[61] Fix | Delete

If mode is "r", this defaults to FORMAT_AUTO. Otherwise, the

[62] Fix | Delete

default is FORMAT_XZ.

[63] Fix | Delete

[64] Fix | Delete

check specifies the integrity check to use. This argument can

[65] Fix | Delete

only be used when opening a file for writing. For FORMAT_XZ,

[66] Fix | Delete

the default is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not

[67] Fix | Delete

support integrity checks - for these formats, check must be

[68] Fix | Delete

omitted, or be CHECK_NONE.

[69] Fix | Delete

[70] Fix | Delete

When opening a file for reading, the *preset* argument is not

[71] Fix | Delete

meaningful, and should be omitted. The *filters* argument should

[72] Fix | Delete

also be omitted, except when format is FORMAT_RAW (in which case

[73] Fix | Delete

it is required).

[74] Fix | Delete

[75] Fix | Delete

When opening a file for writing, the settings used by the

[76] Fix | Delete

compressor can be specified either as a preset compression

[77] Fix | Delete

level (with the *preset* argument), or in detail as a custom

[78] Fix | Delete

filter chain (with the *filters* argument). For FORMAT_XZ and

[79] Fix | Delete

FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset

[80] Fix | Delete

level. For FORMAT_RAW, the caller must always specify a filter

[81] Fix | Delete

chain; the raw compressor does not support preset compression

[82] Fix | Delete

levels.

[83] Fix | Delete

[84] Fix | Delete

preset (if provided) should be an integer in the range 0-9,

[85] Fix | Delete

optionally OR-ed with the constant PRESET_EXTREME.

[86] Fix | Delete

[87] Fix | Delete

filters (if provided) should be a sequence of dicts. Each dict

[88] Fix | Delete

should have an entry for "id" indicating ID of the filter, plus

[89] Fix | Delete

additional entries for options to the filter.

[90] Fix | Delete

"""

[91] Fix | Delete

self._fp = None

[92] Fix | Delete

self._closefp = False

[93] Fix | Delete

self._mode = _MODE_CLOSED

[94] Fix | Delete

[95] Fix | Delete

if mode in ("r", "rb"):

[96] Fix | Delete

if check != -1:

[97] Fix | Delete

raise ValueError("Cannot specify an integrity check "

[98] Fix | Delete

"when opening a file for reading")

[99] Fix | Delete

if preset is not None:

[100] Fix | Delete

raise ValueError("Cannot specify a preset compression "

[101] Fix | Delete

"level when opening a file for reading")

[102] Fix | Delete

if format is None:

[103] Fix | Delete

format = FORMAT_AUTO

[104] Fix | Delete

mode_code = _MODE_READ

[105] Fix | Delete

elif mode in ("w", "wb", "a", "ab", "x", "xb"):

[106] Fix | Delete

if format is None:

[107] Fix | Delete

format = FORMAT_XZ

[108] Fix | Delete

mode_code = _MODE_WRITE

[109] Fix | Delete

self._compressor = LZMACompressor(format=format, check=check,

[110] Fix | Delete

preset=preset, filters=filters)

[111] Fix | Delete

self._pos = 0

[112] Fix | Delete

else:

[113] Fix | Delete

raise ValueError("Invalid mode: {!r}".format(mode))

[114] Fix | Delete

[115] Fix | Delete

if isinstance(filename, (str, bytes, os.PathLike)):

[116] Fix | Delete

if "b" not in mode:

[117] Fix | Delete

mode += "b"

[118] Fix | Delete

self._fp = builtins.open(filename, mode)

[119] Fix | Delete

self._closefp = True

[120] Fix | Delete

self._mode = mode_code

[121] Fix | Delete

elif hasattr(filename, "read") or hasattr(filename, "write"):

[122] Fix | Delete

self._fp = filename

[123] Fix | Delete

self._mode = mode_code

[124] Fix | Delete

else:

[125] Fix | Delete

raise TypeError("filename must be a str, bytes, file or PathLike object")

[126] Fix | Delete

[127] Fix | Delete

if self._mode == _MODE_READ:

[128] Fix | Delete

raw = _compression.DecompressReader(self._fp, LZMADecompressor,

[129] Fix | Delete

trailing_error=LZMAError, format=format, filters=filters)

[130] Fix | Delete

self._buffer = io.BufferedReader(raw)

[131] Fix | Delete

[132] Fix | Delete

def close(self):

[133] Fix | Delete

"""Flush and close the file.

[134] Fix | Delete

[135] Fix | Delete

May be called more than once without error. Once the file is

[136] Fix | Delete

closed, any other operation on it will raise a ValueError.

[137] Fix | Delete

"""

[138] Fix | Delete

if self._mode == _MODE_CLOSED:

[139] Fix | Delete

return

[140] Fix | Delete

try:

[141] Fix | Delete

if self._mode == _MODE_READ:

[142] Fix | Delete

self._buffer.close()

[143] Fix | Delete

self._buffer = None

[144] Fix | Delete

elif self._mode == _MODE_WRITE:

[145] Fix | Delete

self._fp.write(self._compressor.flush())

[146] Fix | Delete

self._compressor = None

[147] Fix | Delete

finally:

[148] Fix | Delete

try:

[149] Fix | Delete

if self._closefp:

[150] Fix | Delete

self._fp.close()

[151] Fix | Delete

finally:

[152] Fix | Delete

self._fp = None

[153] Fix | Delete

self._closefp = False

[154] Fix | Delete

self._mode = _MODE_CLOSED

[155] Fix | Delete

[156] Fix | Delete

@property

[157] Fix | Delete

def closed(self):

[158] Fix | Delete

"""True if this file is closed."""

[159] Fix | Delete

return self._mode == _MODE_CLOSED

[160] Fix | Delete

[161] Fix | Delete

def fileno(self):

[162] Fix | Delete

"""Return the file descriptor for the underlying file."""

[163] Fix | Delete

self._check_not_closed()

[164] Fix | Delete

return self._fp.fileno()

[165] Fix | Delete

[166] Fix | Delete

def seekable(self):

[167] Fix | Delete

"""Return whether the file supports seeking."""

[168] Fix | Delete

return self.readable() and self._buffer.seekable()

[169] Fix | Delete

[170] Fix | Delete

def readable(self):

[171] Fix | Delete

"""Return whether the file was opened for reading."""

[172] Fix | Delete

self._check_not_closed()

[173] Fix | Delete

return self._mode == _MODE_READ

[174] Fix | Delete

[175] Fix | Delete

def writable(self):

[176] Fix | Delete

"""Return whether the file was opened for writing."""

[177] Fix | Delete

self._check_not_closed()

[178] Fix | Delete

return self._mode == _MODE_WRITE

[179] Fix | Delete

[180] Fix | Delete

def peek(self, size=-1):

[181] Fix | Delete

"""Return buffered data without advancing the file position.

[182] Fix | Delete

[183] Fix | Delete

Always returns at least one byte of data, unless at EOF.

[184] Fix | Delete

The exact number of bytes returned is unspecified.

[185] Fix | Delete

"""

[186] Fix | Delete

self._check_can_read()

[187] Fix | Delete

# Relies on the undocumented fact that BufferedReader.peek() always

[188] Fix | Delete

# returns at least one byte (except at EOF)

[189] Fix | Delete

return self._buffer.peek(size)

[190] Fix | Delete

[191] Fix | Delete

def read(self, size=-1):

[192] Fix | Delete

"""Read up to size uncompressed bytes from the file.

[193] Fix | Delete

[194] Fix | Delete

If size is negative or omitted, read until EOF is reached.

[195] Fix | Delete

Returns b"" if the file is already at EOF.

[196] Fix | Delete

"""

[197] Fix | Delete

self._check_can_read()

[198] Fix | Delete

return self._buffer.read(size)

[199] Fix | Delete

[200] Fix | Delete

def read1(self, size=-1):

[201] Fix | Delete

"""Read up to size uncompressed bytes, while trying to avoid

[202] Fix | Delete

making multiple reads from the underlying stream. Reads up to a

[203] Fix | Delete

buffer's worth of data if size is negative.

[204] Fix | Delete

[205] Fix | Delete

Returns b"" if the file is at EOF.

[206] Fix | Delete

"""

[207] Fix | Delete

self._check_can_read()

[208] Fix | Delete

if size < 0:

[209] Fix | Delete

size = io.DEFAULT_BUFFER_SIZE

[210] Fix | Delete

return self._buffer.read1(size)

[211] Fix | Delete

[212] Fix | Delete

def readline(self, size=-1):

[213] Fix | Delete

"""Read a line of uncompressed bytes from the file.

[214] Fix | Delete

[215] Fix | Delete

The terminating newline (if present) is retained. If size is

[216] Fix | Delete

non-negative, no more than size bytes will be read (in which

[217] Fix | Delete

case the line may be incomplete). Returns b'' if already at EOF.

[218] Fix | Delete

"""

[219] Fix | Delete

self._check_can_read()

[220] Fix | Delete

return self._buffer.readline(size)

[221] Fix | Delete

[222] Fix | Delete

def write(self, data):

[223] Fix | Delete

"""Write a bytes object to the file.

[224] Fix | Delete

[225] Fix | Delete

Returns the number of uncompressed bytes written, which is

[226] Fix | Delete

always len(data). Note that due to buffering, the file on disk

[227] Fix | Delete

may not reflect the data written until close() is called.

[228] Fix | Delete

"""

[229] Fix | Delete

self._check_can_write()

[230] Fix | Delete

compressed = self._compressor.compress(data)

[231] Fix | Delete

self._fp.write(compressed)

[232] Fix | Delete

self._pos += len(data)

[233] Fix | Delete

return len(data)

[234] Fix | Delete

[235] Fix | Delete

def seek(self, offset, whence=io.SEEK_SET):

[236] Fix | Delete

"""Change the file position.

[237] Fix | Delete

[238] Fix | Delete

The new position is specified by offset, relative to the

[239] Fix | Delete

position indicated by whence. Possible values for whence are:

[240] Fix | Delete

[241] Fix | Delete

0: start of stream (default): offset must not be negative

[242] Fix | Delete

1: current stream position

[243] Fix | Delete

2: end of stream; offset must not be positive

[244] Fix | Delete

[245] Fix | Delete

Returns the new file position.

[246] Fix | Delete

[247] Fix | Delete

Note that seeking is emulated, so depending on the parameters,

[248] Fix | Delete

this operation may be extremely slow.

[249] Fix | Delete

"""

[250] Fix | Delete

self._check_can_seek()

[251] Fix | Delete

return self._buffer.seek(offset, whence)

[252] Fix | Delete

[253] Fix | Delete

def tell(self):

[254] Fix | Delete

"""Return the current file position."""

[255] Fix | Delete

self._check_not_closed()

[256] Fix | Delete

if self._mode == _MODE_READ:

[257] Fix | Delete

return self._buffer.tell()

[258] Fix | Delete

return self._pos

[259] Fix | Delete

[260] Fix | Delete

[261] Fix | Delete

def open(filename, mode="rb", *,

[262] Fix | Delete

format=None, check=-1, preset=None, filters=None,

[263] Fix | Delete

encoding=None, errors=None, newline=None):

[264] Fix | Delete

"""Open an LZMA-compressed file in binary or text mode.

[265] Fix | Delete

[266] Fix | Delete

filename can be either an actual file name (given as a str, bytes,

[267] Fix | Delete

or PathLike object), in which case the named file is opened, or it

[268] Fix | Delete

can be an existing file object to read from or write to.

[269] Fix | Delete

[270] Fix | Delete

The mode argument can be "r", "rb" (default), "w", "wb", "x", "xb",

[271] Fix | Delete

"a", or "ab" for binary mode, or "rt", "wt", "xt", or "at" for text

[272] Fix | Delete

mode.

[273] Fix | Delete

[274] Fix | Delete

The format, check, preset and filters arguments specify the

[275] Fix | Delete

compression settings, as for LZMACompressor, LZMADecompressor and

[276] Fix | Delete

LZMAFile.

[277] Fix | Delete

[278] Fix | Delete

For binary mode, this function is equivalent to the LZMAFile

[279] Fix | Delete

constructor: LZMAFile(filename, mode, ...). In this case, the

[280] Fix | Delete

encoding, errors and newline arguments must not be provided.

[281] Fix | Delete

[282] Fix | Delete

For text mode, an LZMAFile object is created, and wrapped in an

[283] Fix | Delete

io.TextIOWrapper instance with the specified encoding, error

[284] Fix | Delete

handling behavior, and line ending(s).

[285] Fix | Delete

[286] Fix | Delete

"""

[287] Fix | Delete

if "t" in mode:

[288] Fix | Delete

if "b" in mode:

[289] Fix | Delete

raise ValueError("Invalid mode: %r" % (mode,))

[290] Fix | Delete

else:

[291] Fix | Delete

if encoding is not None:

[292] Fix | Delete

raise ValueError("Argument 'encoding' not supported in binary mode")

[293] Fix | Delete

if errors is not None:

[294] Fix | Delete

raise ValueError("Argument 'errors' not supported in binary mode")

[295] Fix | Delete

if newline is not None:

[296] Fix | Delete

raise ValueError("Argument 'newline' not supported in binary mode")

[297] Fix | Delete

[298] Fix | Delete

lz_mode = mode.replace("t", "")

[299] Fix | Delete

binary_file = LZMAFile(filename, lz_mode, format=format, check=check,

[300] Fix | Delete

preset=preset, filters=filters)

[301] Fix | Delete

[302] Fix | Delete

if "t" in mode:

[303] Fix | Delete

return io.TextIOWrapper(binary_file, encoding, errors, newline)

[304] Fix | Delete

else:

[305] Fix | Delete

return binary_file

[306] Fix | Delete

[307] Fix | Delete

[308] Fix | Delete

def compress(data, format=FORMAT_XZ, check=-1, preset=None, filters=None):

[309] Fix | Delete

"""Compress a block of data.

[310] Fix | Delete

[311] Fix | Delete

Refer to LZMACompressor's docstring for a description of the

[312] Fix | Delete

optional arguments *format*, *check*, *preset* and *filters*.

[313] Fix | Delete

[314] Fix | Delete

For incremental compression, use an LZMACompressor instead.

[315] Fix | Delete

"""

[316] Fix | Delete

comp = LZMACompressor(format, check, preset, filters)

[317] Fix | Delete

return comp.compress(data) + comp.flush()

[318] Fix | Delete

[319] Fix | Delete

[320] Fix | Delete

def decompress(data, format=FORMAT_AUTO, memlimit=None, filters=None):

[321] Fix | Delete

"""Decompress a block of data.

[322] Fix | Delete

[323] Fix | Delete

Refer to LZMADecompressor's docstring for a description of the

[324] Fix | Delete

optional arguments *format*, *check* and *filters*.

[325] Fix | Delete

[326] Fix | Delete

For incremental decompression, use an LZMADecompressor instead.

[327] Fix | Delete

"""

[328] Fix | Delete

results = []

[329] Fix | Delete

while True:

[330] Fix | Delete

decomp = LZMADecompressor(format, memlimit, filters)

[331] Fix | Delete

try:

[332] Fix | Delete

res = decomp.decompress(data)

[333] Fix | Delete

except LZMAError:

[334] Fix | Delete

if results:

[335] Fix | Delete

break # Leftover data is not a valid LZMA/XZ stream; ignore it.

[336] Fix | Delete

else:

[337] Fix | Delete

raise # Error on the first iteration; bail out.

[338] Fix | Delete

results.append(res)

[339] Fix | Delete

if not decomp.eof:

[340] Fix | Delete

raise LZMAError("Compressed data ended before the "

[341] Fix | Delete

"end-of-stream marker was reached")

[342] Fix | Delete

data = decomp.unused_data

[343] Fix | Delete

if not data:

[344] Fix | Delete

break

[345] Fix | Delete

return b"".join(results)

[346] Fix | Delete

[347] Fix | Delete