Edit File by line

"""Interface to the libbzip2 compression library.

[0] Fix | Delete

[1] Fix | Delete

This module provides a file interface, classes for incremental

[2] Fix | Delete

(de)compression, and functions for one-shot (de)compression.

[3] Fix | Delete

"""

[4] Fix | Delete

[5] Fix | Delete

__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",

[6] Fix | Delete

"open", "compress", "decompress"]

[7] Fix | Delete

[8] Fix | Delete

__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"

[9] Fix | Delete

[10] Fix | Delete

from builtins import open as _builtin_open

[11] Fix | Delete

import io

[12] Fix | Delete

import os

[13] Fix | Delete

import warnings

[14] Fix | Delete

import _compression

[15] Fix | Delete

[16] Fix | Delete

try:

[17] Fix | Delete

from threading import RLock

[18] Fix | Delete

except ImportError:

[19] Fix | Delete

from dummy_threading import RLock

[20] Fix | Delete

[21] Fix | Delete

from _bz2 import BZ2Compressor, BZ2Decompressor

[22] Fix | Delete

[23] Fix | Delete

[24] Fix | Delete

_MODE_CLOSED = 0

[25] Fix | Delete

_MODE_READ = 1

[26] Fix | Delete

# Value 2 no longer used

[27] Fix | Delete

_MODE_WRITE = 3

[28] Fix | Delete

[29] Fix | Delete

[30] Fix | Delete

class BZ2File(_compression.BaseStream):

[31] Fix | Delete

[32] Fix | Delete

"""A file object providing transparent bzip2 (de)compression.

[33] Fix | Delete

[34] Fix | Delete

A BZ2File can act as a wrapper for an existing file object, or refer

[35] Fix | Delete

directly to a named file on disk.

[36] Fix | Delete

[37] Fix | Delete

Note that BZ2File provides a *binary* file interface - data read is

[38] Fix | Delete

returned as bytes, and data to be written should be given as bytes.

[39] Fix | Delete

"""

[40] Fix | Delete

[41] Fix | Delete

def __init__(self, filename, mode="r", buffering=None, compresslevel=9):

[42] Fix | Delete

"""Open a bzip2-compressed file.

[43] Fix | Delete

[44] Fix | Delete

If filename is a str, bytes, or PathLike object, it gives the

[45] Fix | Delete

name of the file to be opened. Otherwise, it should be a file

[46] Fix | Delete

object, which will be used to read or write the compressed data.

[47] Fix | Delete

[48] Fix | Delete

mode can be 'r' for reading (default), 'w' for (over)writing,

[49] Fix | Delete

'x' for creating exclusively, or 'a' for appending. These can

[50] Fix | Delete

equivalently be given as 'rb', 'wb', 'xb', and 'ab'.

[51] Fix | Delete

[52] Fix | Delete

buffering is ignored. Its use is deprecated.

[53] Fix | Delete

[54] Fix | Delete

If mode is 'w', 'x' or 'a', compresslevel can be a number between 1

[55] Fix | Delete

and 9 specifying the level of compression: 1 produces the least

[56] Fix | Delete

compression, and 9 (default) produces the most compression.

[57] Fix | Delete

[58] Fix | Delete

If mode is 'r', the input file may be the concatenation of

[59] Fix | Delete

multiple compressed streams.

[60] Fix | Delete

"""

[61] Fix | Delete

# This lock must be recursive, so that BufferedIOBase's

[62] Fix | Delete

# writelines() does not deadlock.

[63] Fix | Delete

self._lock = RLock()

[64] Fix | Delete

self._fp = None

[65] Fix | Delete

self._closefp = False

[66] Fix | Delete

self._mode = _MODE_CLOSED

[67] Fix | Delete

[68] Fix | Delete

if buffering is not None:

[69] Fix | Delete

warnings.warn("Use of 'buffering' argument is deprecated",

[70] Fix | Delete

DeprecationWarning)

[71] Fix | Delete

[72] Fix | Delete

if not (1 <= compresslevel <= 9):

[73] Fix | Delete

raise ValueError("compresslevel must be between 1 and 9")

[74] Fix | Delete

[75] Fix | Delete

if mode in ("", "r", "rb"):

[76] Fix | Delete

mode = "rb"

[77] Fix | Delete

mode_code = _MODE_READ

[78] Fix | Delete

elif mode in ("w", "wb"):

[79] Fix | Delete

mode = "wb"

[80] Fix | Delete

mode_code = _MODE_WRITE

[81] Fix | Delete

self._compressor = BZ2Compressor(compresslevel)

[82] Fix | Delete

elif mode in ("x", "xb"):

[83] Fix | Delete

mode = "xb"

[84] Fix | Delete

mode_code = _MODE_WRITE

[85] Fix | Delete

self._compressor = BZ2Compressor(compresslevel)

[86] Fix | Delete

elif mode in ("a", "ab"):

[87] Fix | Delete

mode = "ab"

[88] Fix | Delete

mode_code = _MODE_WRITE

[89] Fix | Delete

self._compressor = BZ2Compressor(compresslevel)

[90] Fix | Delete

else:

[91] Fix | Delete

raise ValueError("Invalid mode: %r" % (mode,))

[92] Fix | Delete

[93] Fix | Delete

if isinstance(filename, (str, bytes, os.PathLike)):

[94] Fix | Delete

self._fp = _builtin_open(filename, mode)

[95] Fix | Delete

self._closefp = True

[96] Fix | Delete

self._mode = mode_code

[97] Fix | Delete

elif hasattr(filename, "read") or hasattr(filename, "write"):

[98] Fix | Delete

self._fp = filename

[99] Fix | Delete

self._mode = mode_code

[100] Fix | Delete

else:

[101] Fix | Delete

raise TypeError("filename must be a str, bytes, file or PathLike object")

[102] Fix | Delete

[103] Fix | Delete

if self._mode == _MODE_READ:

[104] Fix | Delete

raw = _compression.DecompressReader(self._fp,

[105] Fix | Delete

BZ2Decompressor, trailing_error=OSError)

[106] Fix | Delete

self._buffer = io.BufferedReader(raw)

[107] Fix | Delete

else:

[108] Fix | Delete

self._pos = 0

[109] Fix | Delete

[110] Fix | Delete

def close(self):

[111] Fix | Delete

"""Flush and close the file.

[112] Fix | Delete

[113] Fix | Delete

May be called more than once without error. Once the file is

[114] Fix | Delete

closed, any other operation on it will raise a ValueError.

[115] Fix | Delete

"""

[116] Fix | Delete

with self._lock:

[117] Fix | Delete

if self._mode == _MODE_CLOSED:

[118] Fix | Delete

return

[119] Fix | Delete

try:

[120] Fix | Delete

if self._mode == _MODE_READ:

[121] Fix | Delete

self._buffer.close()

[122] Fix | Delete

elif self._mode == _MODE_WRITE:

[123] Fix | Delete

self._fp.write(self._compressor.flush())

[124] Fix | Delete

self._compressor = None

[125] Fix | Delete

finally:

[126] Fix | Delete

try:

[127] Fix | Delete

if self._closefp:

[128] Fix | Delete

self._fp.close()

[129] Fix | Delete

finally:

[130] Fix | Delete

self._fp = None

[131] Fix | Delete

self._closefp = False

[132] Fix | Delete

self._mode = _MODE_CLOSED

[133] Fix | Delete

self._buffer = None

[134] Fix | Delete

[135] Fix | Delete

@property

[136] Fix | Delete

def closed(self):

[137] Fix | Delete

"""True if this file is closed."""

[138] Fix | Delete

return self._mode == _MODE_CLOSED

[139] Fix | Delete

[140] Fix | Delete

def fileno(self):

[141] Fix | Delete

"""Return the file descriptor for the underlying file."""

[142] Fix | Delete

self._check_not_closed()

[143] Fix | Delete

return self._fp.fileno()

[144] Fix | Delete

[145] Fix | Delete

def seekable(self):

[146] Fix | Delete

"""Return whether the file supports seeking."""

[147] Fix | Delete

return self.readable() and self._buffer.seekable()

[148] Fix | Delete

[149] Fix | Delete

def readable(self):

[150] Fix | Delete

"""Return whether the file was opened for reading."""

[151] Fix | Delete

self._check_not_closed()

[152] Fix | Delete

return self._mode == _MODE_READ

[153] Fix | Delete

[154] Fix | Delete

def writable(self):

[155] Fix | Delete

"""Return whether the file was opened for writing."""

[156] Fix | Delete

self._check_not_closed()

[157] Fix | Delete

return self._mode == _MODE_WRITE

[158] Fix | Delete

[159] Fix | Delete

def peek(self, n=0):

[160] Fix | Delete

"""Return buffered data without advancing the file position.

[161] Fix | Delete

[162] Fix | Delete

Always returns at least one byte of data, unless at EOF.

[163] Fix | Delete

The exact number of bytes returned is unspecified.

[164] Fix | Delete

"""

[165] Fix | Delete

with self._lock:

[166] Fix | Delete

self._check_can_read()

[167] Fix | Delete

# Relies on the undocumented fact that BufferedReader.peek()

[168] Fix | Delete

# always returns at least one byte (except at EOF), independent

[169] Fix | Delete

# of the value of n

[170] Fix | Delete

return self._buffer.peek(n)

[171] Fix | Delete

[172] Fix | Delete

def read(self, size=-1):

[173] Fix | Delete

"""Read up to size uncompressed bytes from the file.

[174] Fix | Delete

[175] Fix | Delete

If size is negative or omitted, read until EOF is reached.

[176] Fix | Delete

Returns b'' if the file is already at EOF.

[177] Fix | Delete

"""

[178] Fix | Delete

with self._lock:

[179] Fix | Delete

self._check_can_read()

[180] Fix | Delete

return self._buffer.read(size)

[181] Fix | Delete

[182] Fix | Delete

def read1(self, size=-1):

[183] Fix | Delete

"""Read up to size uncompressed bytes, while trying to avoid

[184] Fix | Delete

making multiple reads from the underlying stream. Reads up to a

[185] Fix | Delete

buffer's worth of data if size is negative.

[186] Fix | Delete

[187] Fix | Delete

Returns b'' if the file is at EOF.

[188] Fix | Delete

"""

[189] Fix | Delete

with self._lock:

[190] Fix | Delete

self._check_can_read()

[191] Fix | Delete

if size < 0:

[192] Fix | Delete

size = io.DEFAULT_BUFFER_SIZE

[193] Fix | Delete

return self._buffer.read1(size)

[194] Fix | Delete

[195] Fix | Delete

def readinto(self, b):

[196] Fix | Delete

"""Read bytes into b.

[197] Fix | Delete

[198] Fix | Delete

Returns the number of bytes read (0 for EOF).

[199] Fix | Delete

"""

[200] Fix | Delete

with self._lock:

[201] Fix | Delete

self._check_can_read()

[202] Fix | Delete

return self._buffer.readinto(b)

[203] Fix | Delete

[204] Fix | Delete

def readline(self, size=-1):

[205] Fix | Delete

"""Read a line of uncompressed bytes from the file.

[206] Fix | Delete

[207] Fix | Delete

The terminating newline (if present) is retained. If size is

[208] Fix | Delete

non-negative, no more than size bytes will be read (in which

[209] Fix | Delete

case the line may be incomplete). Returns b'' if already at EOF.

[210] Fix | Delete

"""

[211] Fix | Delete

if not isinstance(size, int):

[212] Fix | Delete

if not hasattr(size, "__index__"):

[213] Fix | Delete

raise TypeError("Integer argument expected")

[214] Fix | Delete

size = size.__index__()

[215] Fix | Delete

with self._lock:

[216] Fix | Delete

self._check_can_read()

[217] Fix | Delete

return self._buffer.readline(size)

[218] Fix | Delete

[219] Fix | Delete

def readlines(self, size=-1):

[220] Fix | Delete

"""Read a list of lines of uncompressed bytes from the file.

[221] Fix | Delete

[222] Fix | Delete

size can be specified to control the number of lines read: no

[223] Fix | Delete

further lines will be read once the total size of the lines read

[224] Fix | Delete

so far equals or exceeds size.

[225] Fix | Delete

"""

[226] Fix | Delete

if not isinstance(size, int):

[227] Fix | Delete

if not hasattr(size, "__index__"):

[228] Fix | Delete

raise TypeError("Integer argument expected")

[229] Fix | Delete

size = size.__index__()

[230] Fix | Delete

with self._lock:

[231] Fix | Delete

self._check_can_read()

[232] Fix | Delete

return self._buffer.readlines(size)

[233] Fix | Delete

[234] Fix | Delete

def write(self, data):

[235] Fix | Delete

"""Write a byte string to the file.

[236] Fix | Delete

[237] Fix | Delete

Returns the number of uncompressed bytes written, which is

[238] Fix | Delete

always len(data). Note that due to buffering, the file on disk

[239] Fix | Delete

may not reflect the data written until close() is called.

[240] Fix | Delete

"""

[241] Fix | Delete

with self._lock:

[242] Fix | Delete

self._check_can_write()

[243] Fix | Delete

compressed = self._compressor.compress(data)

[244] Fix | Delete

self._fp.write(compressed)

[245] Fix | Delete

self._pos += len(data)

[246] Fix | Delete

return len(data)

[247] Fix | Delete

[248] Fix | Delete

def writelines(self, seq):

[249] Fix | Delete

"""Write a sequence of byte strings to the file.

[250] Fix | Delete

[251] Fix | Delete

Returns the number of uncompressed bytes written.

[252] Fix | Delete

seq can be any iterable yielding byte strings.

[253] Fix | Delete

[254] Fix | Delete

Line separators are not added between the written byte strings.

[255] Fix | Delete

"""

[256] Fix | Delete

with self._lock:

[257] Fix | Delete

return _compression.BaseStream.writelines(self, seq)

[258] Fix | Delete

[259] Fix | Delete

def seek(self, offset, whence=io.SEEK_SET):

[260] Fix | Delete

"""Change the file position.

[261] Fix | Delete

[262] Fix | Delete

The new position is specified by offset, relative to the

[263] Fix | Delete

position indicated by whence. Values for whence are:

[264] Fix | Delete

[265] Fix | Delete

0: start of stream (default); offset must not be negative

[266] Fix | Delete

1: current stream position

[267] Fix | Delete

2: end of stream; offset must not be positive

[268] Fix | Delete

[269] Fix | Delete

Returns the new file position.

[270] Fix | Delete

[271] Fix | Delete

Note that seeking is emulated, so depending on the parameters,

[272] Fix | Delete

this operation may be extremely slow.

[273] Fix | Delete

"""

[274] Fix | Delete

with self._lock:

[275] Fix | Delete

self._check_can_seek()

[276] Fix | Delete

return self._buffer.seek(offset, whence)

[277] Fix | Delete

[278] Fix | Delete

def tell(self):

[279] Fix | Delete

"""Return the current file position."""

[280] Fix | Delete

with self._lock:

[281] Fix | Delete

self._check_not_closed()

[282] Fix | Delete

if self._mode == _MODE_READ:

[283] Fix | Delete

return self._buffer.tell()

[284] Fix | Delete

return self._pos

[285] Fix | Delete

[286] Fix | Delete

[287] Fix | Delete

def open(filename, mode="rb", compresslevel=9,

[288] Fix | Delete

encoding=None, errors=None, newline=None):

[289] Fix | Delete

"""Open a bzip2-compressed file in binary or text mode.

[290] Fix | Delete

[291] Fix | Delete

The filename argument can be an actual filename (a str, bytes, or

[292] Fix | Delete

PathLike object), or an existing file object to read from or write

[293] Fix | Delete

to.

[294] Fix | Delete

[295] Fix | Delete

The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or

[296] Fix | Delete

"ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.

[297] Fix | Delete

The default mode is "rb", and the default compresslevel is 9.

[298] Fix | Delete

[299] Fix | Delete

For binary mode, this function is equivalent to the BZ2File

[300] Fix | Delete

constructor: BZ2File(filename, mode, compresslevel). In this case,

[301] Fix | Delete

the encoding, errors and newline arguments must not be provided.

[302] Fix | Delete

[303] Fix | Delete

For text mode, a BZ2File object is created, and wrapped in an

[304] Fix | Delete

io.TextIOWrapper instance with the specified encoding, error

[305] Fix | Delete

handling behavior, and line ending(s).

[306] Fix | Delete

[307] Fix | Delete

"""

[308] Fix | Delete

if "t" in mode:

[309] Fix | Delete

if "b" in mode:

[310] Fix | Delete

raise ValueError("Invalid mode: %r" % (mode,))

[311] Fix | Delete

else:

[312] Fix | Delete

if encoding is not None:

[313] Fix | Delete

raise ValueError("Argument 'encoding' not supported in binary mode")

[314] Fix | Delete

if errors is not None:

[315] Fix | Delete

raise ValueError("Argument 'errors' not supported in binary mode")

[316] Fix | Delete

if newline is not None:

[317] Fix | Delete

raise ValueError("Argument 'newline' not supported in binary mode")

[318] Fix | Delete

[319] Fix | Delete

bz_mode = mode.replace("t", "")

[320] Fix | Delete

binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)

[321] Fix | Delete

[322] Fix | Delete

if "t" in mode:

[323] Fix | Delete

return io.TextIOWrapper(binary_file, encoding, errors, newline)

[324] Fix | Delete

else:

[325] Fix | Delete

return binary_file

[326] Fix | Delete

[327] Fix | Delete

[328] Fix | Delete

def compress(data, compresslevel=9):

[329] Fix | Delete

"""Compress a block of data.

[330] Fix | Delete

[331] Fix | Delete

compresslevel, if given, must be a number between 1 and 9.

[332] Fix | Delete

[333] Fix | Delete

For incremental compression, use a BZ2Compressor object instead.

[334] Fix | Delete

"""

[335] Fix | Delete

comp = BZ2Compressor(compresslevel)

[336] Fix | Delete

return comp.compress(data) + comp.flush()

[337] Fix | Delete

[338] Fix | Delete

[339] Fix | Delete

def decompress(data):

[340] Fix | Delete

"""Decompress a block of data.

[341] Fix | Delete

[342] Fix | Delete

For incremental decompression, use a BZ2Decompressor object instead.

[343] Fix | Delete

"""

[344] Fix | Delete

results = []

[345] Fix | Delete

while data:

[346] Fix | Delete

decomp = BZ2Decompressor()

[347] Fix | Delete

try:

[348] Fix | Delete

res = decomp.decompress(data)

[349] Fix | Delete

except OSError:

[350] Fix | Delete

if results:

[351] Fix | Delete

break # Leftover data is not a valid bzip2 stream; ignore it.

[352] Fix | Delete

else:

[353] Fix | Delete

raise # Error on the first iteration; bail out.

[354] Fix | Delete

results.append(res)

[355] Fix | Delete

if not decomp.eof:

[356] Fix | Delete

raise ValueError("Compressed data ended before the "

[357] Fix | Delete

"end-of-stream marker was reached")

[358] Fix | Delete

data = decomp.unused_data

[359] Fix | Delete

return b"".join(results)

[360] Fix | Delete

[361] Fix | Delete