Edit File by line

"""Functions that read and write gzipped files.

[0] Fix | Delete

[1] Fix | Delete

The user of the file doesn't have to worry about the compression,

[2] Fix | Delete

but random access is not allowed."""

[3] Fix | Delete

[4] Fix | Delete

# based on Andrew Kuchling's minigzip.py distributed with the zlib module

[5] Fix | Delete

[6] Fix | Delete

import struct, sys, time, os

[7] Fix | Delete

import zlib

[8] Fix | Delete

import io

[9] Fix | Delete

import __builtin__

[10] Fix | Delete

[11] Fix | Delete

__all__ = ["GzipFile","open"]

[12] Fix | Delete

[13] Fix | Delete

FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16

[14] Fix | Delete

[15] Fix | Delete

READ, WRITE = 1, 2

[16] Fix | Delete

[17] Fix | Delete

def write32u(output, value):

[18] Fix | Delete

# The L format writes the bit pattern correctly whether signed

[19] Fix | Delete

# or unsigned.

[20] Fix | Delete

output.write(struct.pack("<L", value))

[21] Fix | Delete

[22] Fix | Delete

def read32(input):

[23] Fix | Delete

return struct.unpack("<I", input.read(4))[0]

[24] Fix | Delete

[25] Fix | Delete

def open(filename, mode="rb", compresslevel=9):

[26] Fix | Delete

"""Shorthand for GzipFile(filename, mode, compresslevel).

[27] Fix | Delete

[28] Fix | Delete

The filename argument is required; mode defaults to 'rb'

[29] Fix | Delete

and compresslevel defaults to 9.

[30] Fix | Delete

[31] Fix | Delete

"""

[32] Fix | Delete

return GzipFile(filename, mode, compresslevel)

[33] Fix | Delete

[34] Fix | Delete

class GzipFile(io.BufferedIOBase):

[35] Fix | Delete

"""The GzipFile class simulates most of the methods of a file object with

[36] Fix | Delete

the exception of the readinto() and truncate() methods.

[37] Fix | Delete

[38] Fix | Delete

"""

[39] Fix | Delete

[40] Fix | Delete

myfileobj = None

[41] Fix | Delete

max_read_chunk = 10 * 1024 * 1024 # 10Mb

[42] Fix | Delete

[43] Fix | Delete

def __init__(self, filename=None, mode=None,

[44] Fix | Delete

compresslevel=9, fileobj=None, mtime=None):

[45] Fix | Delete

"""Constructor for the GzipFile class.

[46] Fix | Delete

[47] Fix | Delete

At least one of fileobj and filename must be given a

[48] Fix | Delete

non-trivial value.

[49] Fix | Delete

[50] Fix | Delete

The new class instance is based on fileobj, which can be a regular

[51] Fix | Delete

file, a StringIO object, or any other object which simulates a file.

[52] Fix | Delete

It defaults to None, in which case filename is opened to provide

[53] Fix | Delete

a file object.

[54] Fix | Delete

[55] Fix | Delete

When fileobj is not None, the filename argument is only used to be

[56] Fix | Delete

included in the gzip file header, which may include the original

[57] Fix | Delete

filename of the uncompressed file. It defaults to the filename of

[58] Fix | Delete

fileobj, if discernible; otherwise, it defaults to the empty string,

[59] Fix | Delete

and in this case the original filename is not included in the header.

[60] Fix | Delete

[61] Fix | Delete

The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',

[62] Fix | Delete

depending on whether the file will be read or written. The default

[63] Fix | Delete

is the mode of fileobj if discernible; otherwise, the default is 'rb'.

[64] Fix | Delete

Be aware that only the 'rb', 'ab', and 'wb' values should be used

[65] Fix | Delete

for cross-platform portability.

[66] Fix | Delete

[67] Fix | Delete

The compresslevel argument is an integer from 0 to 9 controlling the

[68] Fix | Delete

level of compression; 1 is fastest and produces the least compression,

[69] Fix | Delete

and 9 is slowest and produces the most compression. 0 is no compression

[70] Fix | Delete

at all. The default is 9.

[71] Fix | Delete

[72] Fix | Delete

The mtime argument is an optional numeric timestamp to be written

[73] Fix | Delete

to the stream when compressing. All gzip compressed streams

[74] Fix | Delete

are required to contain a timestamp. If omitted or None, the

[75] Fix | Delete

current time is used. This module ignores the timestamp when

[76] Fix | Delete

decompressing; however, some programs, such as gunzip, make use

[77] Fix | Delete

of it. The format of the timestamp is the same as that of the

[78] Fix | Delete

return value of time.time() and of the st_mtime member of the

[79] Fix | Delete

object returned by os.stat().

[80] Fix | Delete

[81] Fix | Delete

"""

[82] Fix | Delete

[83] Fix | Delete

# Make sure we don't inadvertently enable universal newlines on the

[84] Fix | Delete

# underlying file object - in read mode, this causes data corruption.

[85] Fix | Delete

if mode:

[86] Fix | Delete

mode = mode.replace('U', '')

[87] Fix | Delete

# guarantee the file is opened in binary mode on platforms

[88] Fix | Delete

# that care about that sort of thing

[89] Fix | Delete

if mode and 'b' not in mode:

[90] Fix | Delete

mode += 'b'

[91] Fix | Delete

if fileobj is None:

[92] Fix | Delete

fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')

[93] Fix | Delete

if filename is None:

[94] Fix | Delete

# Issue #13781: os.fdopen() creates a fileobj with a bogus name

[95] Fix | Delete

# attribute. Avoid saving this in the gzip header's filename field.

[96] Fix | Delete

filename = getattr(fileobj, 'name', '')

[97] Fix | Delete

if not isinstance(filename, basestring) or filename == '<fdopen>':

[98] Fix | Delete

filename = ''

[99] Fix | Delete

if mode is None:

[100] Fix | Delete

if hasattr(fileobj, 'mode'): mode = fileobj.mode

[101] Fix | Delete

else: mode = 'rb'

[102] Fix | Delete

[103] Fix | Delete

if mode[0:1] == 'r':

[104] Fix | Delete

self.mode = READ

[105] Fix | Delete

# Set flag indicating start of a new member

[106] Fix | Delete

self._new_member = True

[107] Fix | Delete

# Buffer data read from gzip file. extrastart is offset in

[108] Fix | Delete

# stream where buffer starts. extrasize is number of

[109] Fix | Delete

# bytes remaining in buffer from current stream position.

[110] Fix | Delete

self.extrabuf = ""

[111] Fix | Delete

self.extrasize = 0

[112] Fix | Delete

self.extrastart = 0

[113] Fix | Delete

self.name = filename

[114] Fix | Delete

# Starts small, scales exponentially

[115] Fix | Delete

self.min_readsize = 100

[116] Fix | Delete

[117] Fix | Delete

elif mode[0:1] == 'w' or mode[0:1] == 'a':

[118] Fix | Delete

self.mode = WRITE

[119] Fix | Delete

self._init_write(filename)

[120] Fix | Delete

self.compress = zlib.compressobj(compresslevel,

[121] Fix | Delete

zlib.DEFLATED,

[122] Fix | Delete

-zlib.MAX_WBITS,

[123] Fix | Delete

zlib.DEF_MEM_LEVEL,

[124] Fix | Delete

[125] Fix | Delete

else:

[126] Fix | Delete

raise IOError, "Mode " + mode + " not supported"

[127] Fix | Delete

[128] Fix | Delete

self.fileobj = fileobj

[129] Fix | Delete

self.offset = 0

[130] Fix | Delete

self.mtime = mtime

[131] Fix | Delete

[132] Fix | Delete

if self.mode == WRITE:

[133] Fix | Delete

self._write_gzip_header()

[134] Fix | Delete

[135] Fix | Delete

@property

[136] Fix | Delete

def filename(self):

[137] Fix | Delete

import warnings

[138] Fix | Delete

warnings.warn("use the name attribute", DeprecationWarning, 2)

[139] Fix | Delete

if self.mode == WRITE and self.name[-3:] != ".gz":

[140] Fix | Delete

return self.name + ".gz"

[141] Fix | Delete

return self.name

[142] Fix | Delete

[143] Fix | Delete

def __repr__(self):

[144] Fix | Delete

s = repr(self.fileobj)

[145] Fix | Delete

return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'

[146] Fix | Delete

[147] Fix | Delete

def _check_closed(self):

[148] Fix | Delete

"""Raises a ValueError if the underlying file object has been closed.

[149] Fix | Delete

[150] Fix | Delete

"""

[151] Fix | Delete

if self.closed:

[152] Fix | Delete

raise ValueError('I/O operation on closed file.')

[153] Fix | Delete

[154] Fix | Delete

def _init_write(self, filename):

[155] Fix | Delete

self.name = filename

[156] Fix | Delete

self.crc = zlib.crc32("") & 0xffffffffL

[157] Fix | Delete

self.size = 0

[158] Fix | Delete

self.writebuf = []

[159] Fix | Delete

self.bufsize = 0

[160] Fix | Delete

[161] Fix | Delete

def _write_gzip_header(self):

[162] Fix | Delete

self.fileobj.write('\037\213') # magic header

[163] Fix | Delete

self.fileobj.write('\010') # compression method

[164] Fix | Delete

try:

[165] Fix | Delete

# RFC 1952 requires the FNAME field to be Latin-1. Do not

[166] Fix | Delete

# include filenames that cannot be represented that way.

[167] Fix | Delete

fname = os.path.basename(self.name)

[168] Fix | Delete

if not isinstance(fname, str):

[169] Fix | Delete

fname = fname.encode('latin-1')

[170] Fix | Delete

if fname.endswith('.gz'):

[171] Fix | Delete

fname = fname[:-3]

[172] Fix | Delete

except UnicodeEncodeError:

[173] Fix | Delete

fname = ''

[174] Fix | Delete

flags = 0

[175] Fix | Delete

if fname:

[176] Fix | Delete

flags = FNAME

[177] Fix | Delete

self.fileobj.write(chr(flags))

[178] Fix | Delete

mtime = self.mtime

[179] Fix | Delete

if mtime is None:

[180] Fix | Delete

mtime = time.time()

[181] Fix | Delete

write32u(self.fileobj, long(mtime))

[182] Fix | Delete

self.fileobj.write('\002')

[183] Fix | Delete

self.fileobj.write('\377')

[184] Fix | Delete

if fname:

[185] Fix | Delete

self.fileobj.write(fname + '\000')

[186] Fix | Delete

[187] Fix | Delete

def _init_read(self):

[188] Fix | Delete

self.crc = zlib.crc32("") & 0xffffffffL

[189] Fix | Delete

self.size = 0

[190] Fix | Delete

[191] Fix | Delete

def _read_gzip_header(self):

[192] Fix | Delete

magic = self.fileobj.read(2)

[193] Fix | Delete

if magic != '\037\213':

[194] Fix | Delete

raise IOError, 'Not a gzipped file'

[195] Fix | Delete

method = ord( self.fileobj.read(1) )

[196] Fix | Delete

if method != 8:

[197] Fix | Delete

raise IOError, 'Unknown compression method'

[198] Fix | Delete

flag = ord( self.fileobj.read(1) )

[199] Fix | Delete

self.mtime = read32(self.fileobj)

[200] Fix | Delete

# extraflag = self.fileobj.read(1)

[201] Fix | Delete

# os = self.fileobj.read(1)

[202] Fix | Delete

self.fileobj.read(2)

[203] Fix | Delete

[204] Fix | Delete

if flag & FEXTRA:

[205] Fix | Delete

# Read & discard the extra field, if present

[206] Fix | Delete

xlen = ord(self.fileobj.read(1))

[207] Fix | Delete

xlen = xlen + 256*ord(self.fileobj.read(1))

[208] Fix | Delete

self.fileobj.read(xlen)

[209] Fix | Delete

if flag & FNAME:

[210] Fix | Delete

# Read and discard a null-terminated string containing the filename

[211] Fix | Delete

while True:

[212] Fix | Delete

s = self.fileobj.read(1)

[213] Fix | Delete

if not s or s=='\000':

[214] Fix | Delete

break

[215] Fix | Delete

if flag & FCOMMENT:

[216] Fix | Delete

# Read and discard a null-terminated string containing a comment

[217] Fix | Delete

while True:

[218] Fix | Delete

s = self.fileobj.read(1)

[219] Fix | Delete

if not s or s=='\000':

[220] Fix | Delete

break

[221] Fix | Delete

if flag & FHCRC:

[222] Fix | Delete

self.fileobj.read(2) # Read & discard the 16-bit header CRC

[223] Fix | Delete

[224] Fix | Delete

def write(self,data):

[225] Fix | Delete

self._check_closed()

[226] Fix | Delete

if self.mode != WRITE:

[227] Fix | Delete

import errno

[228] Fix | Delete

raise IOError(errno.EBADF, "write() on read-only GzipFile object")

[229] Fix | Delete

[230] Fix | Delete

if self.fileobj is None:

[231] Fix | Delete

raise ValueError, "write() on closed GzipFile object"

[232] Fix | Delete

[233] Fix | Delete

# Convert data type if called by io.BufferedWriter.

[234] Fix | Delete

if isinstance(data, memoryview):

[235] Fix | Delete

data = data.tobytes()

[236] Fix | Delete

[237] Fix | Delete

if len(data) > 0:

[238] Fix | Delete

self.fileobj.write(self.compress.compress(data))

[239] Fix | Delete

self.size += len(data)

[240] Fix | Delete

self.crc = zlib.crc32(data, self.crc) & 0xffffffffL

[241] Fix | Delete

self.offset += len(data)

[242] Fix | Delete

[243] Fix | Delete

return len(data)

[244] Fix | Delete

[245] Fix | Delete

def read(self, size=-1):

[246] Fix | Delete

self._check_closed()

[247] Fix | Delete

if self.mode != READ:

[248] Fix | Delete

import errno

[249] Fix | Delete

raise IOError(errno.EBADF, "read() on write-only GzipFile object")

[250] Fix | Delete

[251] Fix | Delete

if self.extrasize <= 0 and self.fileobj is None:

[252] Fix | Delete

return ''

[253] Fix | Delete

[254] Fix | Delete

readsize = 1024

[255] Fix | Delete

if size < 0: # get the whole thing

[256] Fix | Delete

try:

[257] Fix | Delete

while True:

[258] Fix | Delete

self._read(readsize)

[259] Fix | Delete

readsize = min(self.max_read_chunk, readsize * 2)

[260] Fix | Delete

except EOFError:

[261] Fix | Delete

size = self.extrasize

[262] Fix | Delete

else: # just get some more of it

[263] Fix | Delete

try:

[264] Fix | Delete

while size > self.extrasize:

[265] Fix | Delete

self._read(readsize)

[266] Fix | Delete

readsize = min(self.max_read_chunk, readsize * 2)

[267] Fix | Delete

except EOFError:

[268] Fix | Delete

if size > self.extrasize:

[269] Fix | Delete

size = self.extrasize

[270] Fix | Delete

[271] Fix | Delete

offset = self.offset - self.extrastart

[272] Fix | Delete

chunk = self.extrabuf[offset: offset + size]

[273] Fix | Delete

self.extrasize = self.extrasize - size

[274] Fix | Delete

[275] Fix | Delete

self.offset += size

[276] Fix | Delete

return chunk

[277] Fix | Delete

[278] Fix | Delete

def _unread(self, buf):

[279] Fix | Delete

self.extrasize = len(buf) + self.extrasize

[280] Fix | Delete

self.offset -= len(buf)

[281] Fix | Delete

[282] Fix | Delete

def _read(self, size=1024):

[283] Fix | Delete

if self.fileobj is None:

[284] Fix | Delete

raise EOFError, "Reached EOF"

[285] Fix | Delete

[286] Fix | Delete

if self._new_member:

[287] Fix | Delete

# If the _new_member flag is set, we have to

[288] Fix | Delete

# jump to the next member, if there is one.

[289] Fix | Delete

[290] Fix | Delete

# First, check if we're at the end of the file;

[291] Fix | Delete

# if so, it's time to stop; no more members to read.

[292] Fix | Delete

pos = self.fileobj.tell() # Save current position

[293] Fix | Delete

self.fileobj.seek(0, 2) # Seek to end of file

[294] Fix | Delete

if pos == self.fileobj.tell():

[295] Fix | Delete

raise EOFError, "Reached EOF"

[296] Fix | Delete

else:

[297] Fix | Delete

self.fileobj.seek( pos ) # Return to original position

[298] Fix | Delete

[299] Fix | Delete

self._init_read()

[300] Fix | Delete

self._read_gzip_header()

[301] Fix | Delete

self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)

[302] Fix | Delete

self._new_member = False

[303] Fix | Delete

[304] Fix | Delete

# Read a chunk of data from the file

[305] Fix | Delete

buf = self.fileobj.read(size)

[306] Fix | Delete

[307] Fix | Delete

# If the EOF has been reached, flush the decompression object

[308] Fix | Delete

# and mark this object as finished.

[309] Fix | Delete

[310] Fix | Delete

if buf == "":

[311] Fix | Delete

uncompress = self.decompress.flush()

[312] Fix | Delete

self._read_eof()

[313] Fix | Delete

self._add_read_data( uncompress )

[314] Fix | Delete

raise EOFError, 'Reached EOF'

[315] Fix | Delete

[316] Fix | Delete

uncompress = self.decompress.decompress(buf)

[317] Fix | Delete

self._add_read_data( uncompress )

[318] Fix | Delete

[319] Fix | Delete

if self.decompress.unused_data != "":

[320] Fix | Delete

# Ending case: we've come to the end of a member in the file,

[321] Fix | Delete

# so seek back to the start of the unused data, finish up

[322] Fix | Delete

# this member, and read a new gzip header.

[323] Fix | Delete

# (The number of bytes to seek back is the length of the unused

[324] Fix | Delete

# data, minus 8 because _read_eof() will rewind a further 8 bytes)

[325] Fix | Delete

self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)

[326] Fix | Delete

[327] Fix | Delete

# Check the CRC and file size, and set the flag so we read

[328] Fix | Delete

# a new member on the next call

[329] Fix | Delete

self._read_eof()

[330] Fix | Delete

self._new_member = True

[331] Fix | Delete

[332] Fix | Delete

def _add_read_data(self, data):

[333] Fix | Delete

self.crc = zlib.crc32(data, self.crc) & 0xffffffffL

[334] Fix | Delete

offset = self.offset - self.extrastart

[335] Fix | Delete

self.extrabuf = self.extrabuf[offset:] + data

[336] Fix | Delete

self.extrasize = self.extrasize + len(data)

[337] Fix | Delete

self.extrastart = self.offset

[338] Fix | Delete

self.size = self.size + len(data)

[339] Fix | Delete

[340] Fix | Delete

def _read_eof(self):

[341] Fix | Delete

# We've read to the end of the file, so we have to rewind in order

[342] Fix | Delete

# to reread the 8 bytes containing the CRC and the file size.

[343] Fix | Delete

# We check the that the computed CRC and size of the

[344] Fix | Delete

# uncompressed data matches the stored values. Note that the size

[345] Fix | Delete

# stored is the true file size mod 2**32.

[346] Fix | Delete

self.fileobj.seek(-8, 1)

[347] Fix | Delete

crc32 = read32(self.fileobj)

[348] Fix | Delete

isize = read32(self.fileobj) # may exceed 2GB

[349] Fix | Delete

if crc32 != self.crc:

[350] Fix | Delete

raise IOError("CRC check failed %s != %s" % (hex(crc32),

[351] Fix | Delete

hex(self.crc)))

[352] Fix | Delete

elif isize != (self.size & 0xffffffffL):

[353] Fix | Delete

raise IOError, "Incorrect length of data produced"

[354] Fix | Delete

[355] Fix | Delete

# Gzip files can be padded with zeroes and still have archives.

[356] Fix | Delete

# Consume all zero bytes and set the file position to the first

[357] Fix | Delete

# non-zero byte. See http://www.gzip.org/#faq8

[358] Fix | Delete

c = "\x00"

[359] Fix | Delete

while c == "\x00":

[360] Fix | Delete

c = self.fileobj.read(1)

[361] Fix | Delete

if c:

[362] Fix | Delete

self.fileobj.seek(-1, 1)

[363] Fix | Delete

[364] Fix | Delete

@property

[365] Fix | Delete

def closed(self):

[366] Fix | Delete

return self.fileobj is None

[367] Fix | Delete

[368] Fix | Delete

def close(self):

[369] Fix | Delete

fileobj = self.fileobj

[370] Fix | Delete

if fileobj is None:

[371] Fix | Delete

return

[372] Fix | Delete

self.fileobj = None

[373] Fix | Delete

try:

[374] Fix | Delete

if self.mode == WRITE:

[375] Fix | Delete

fileobj.write(self.compress.flush())

[376] Fix | Delete

write32u(fileobj, self.crc)

[377] Fix | Delete

# self.size may exceed 2GB, or even 4GB

[378] Fix | Delete

write32u(fileobj, self.size & 0xffffffffL)

[379] Fix | Delete

finally:

[380] Fix | Delete

myfileobj = self.myfileobj

[381] Fix | Delete

if myfileobj:

[382] Fix | Delete

self.myfileobj = None

[383] Fix | Delete

myfileobj.close()

[384] Fix | Delete

[385] Fix | Delete

def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):

[386] Fix | Delete

self._check_closed()

[387] Fix | Delete

if self.mode == WRITE:

[388] Fix | Delete

# Ensure the compressor's buffer is flushed

[389] Fix | Delete

self.fileobj.write(self.compress.flush(zlib_mode))

[390] Fix | Delete

self.fileobj.flush()

[391] Fix | Delete

[392] Fix | Delete

def fileno(self):

[393] Fix | Delete

"""Invoke the underlying file object's fileno() method.

[394] Fix | Delete

[395] Fix | Delete

This will raise AttributeError if the underlying file object

[396] Fix | Delete

doesn't support fileno().

[397] Fix | Delete

"""

[398] Fix | Delete

return self.fileobj.fileno()

[399] Fix | Delete

[400] Fix | Delete

def rewind(self):

[401] Fix | Delete

'''Return the uncompressed stream file position indicator to the

[402] Fix | Delete

beginning of the file'''

[403] Fix | Delete

if self.mode != READ:

[404] Fix | Delete

raise IOError("Can't rewind in write mode")

[405] Fix | Delete

self.fileobj.seek(0)

[406] Fix | Delete

self._new_member = True

[407] Fix | Delete

self.extrabuf = ""

[408] Fix | Delete

self.extrasize = 0

[409] Fix | Delete

self.extrastart = 0

[410] Fix | Delete

self.offset = 0

[411] Fix | Delete

[412] Fix | Delete

def readable(self):

[413] Fix | Delete

return self.mode == READ

[414] Fix | Delete

[415] Fix | Delete

def writable(self):

[416] Fix | Delete

return self.mode == WRITE

[417] Fix | Delete

[418] Fix | Delete

def seekable(self):

[419] Fix | Delete

return True

[420] Fix | Delete

[421] Fix | Delete

def seek(self, offset, whence=0):

[422] Fix | Delete

if whence:

[423] Fix | Delete

if whence == 1:

[424] Fix | Delete

offset = self.offset + offset

[425] Fix | Delete

else:

[426] Fix | Delete

raise ValueError('Seek from end not supported')

[427] Fix | Delete

if self.mode == WRITE:

[428] Fix | Delete

if offset < self.offset:

[429] Fix | Delete

raise IOError('Negative seek in write mode')

[430] Fix | Delete

count = offset - self.offset

[431] Fix | Delete

for i in xrange(count // 1024):

[432] Fix | Delete

self.write(1024 * '\0')

[433] Fix | Delete

self.write((count % 1024) * '\0')

[434] Fix | Delete

elif self.mode == READ:

[435] Fix | Delete

if offset < self.offset:

[436] Fix | Delete

# for negative seek, rewind and do positive seek

[437] Fix | Delete

self.rewind()

[438] Fix | Delete

count = offset - self.offset

[439] Fix | Delete

for i in xrange(count // 1024):

[440] Fix | Delete

self.read(1024)

[441] Fix | Delete

self.read(count % 1024)

[442] Fix | Delete

[443] Fix | Delete

return self.offset

[444] Fix | Delete

[445] Fix | Delete

def readline(self, size=-1):

[446] Fix | Delete

if size < 0:

[447] Fix | Delete

# Shortcut common case - newline found in buffer.

[448] Fix | Delete

offset = self.offset - self.extrastart

[449] Fix | Delete

i = self.extrabuf.find('\n', offset) + 1

[450] Fix | Delete

if i > 0:

[451] Fix | Delete

self.extrasize -= i - offset

[452] Fix | Delete

self.offset += i - offset

[453] Fix | Delete

return self.extrabuf[offset: i]

[454] Fix | Delete

[455] Fix | Delete

size = sys.maxint

[456] Fix | Delete

readsize = self.min_readsize

[457] Fix | Delete

else:

[458] Fix | Delete

readsize = size

[459] Fix | Delete

bufs = []

[460] Fix | Delete

while size != 0:

[461] Fix | Delete

c = self.read(readsize)

[462] Fix | Delete

i = c.find('\n')

[463] Fix | Delete

[464] Fix | Delete

# We set i=size to break out of the loop under two

[465] Fix | Delete

# conditions: 1) there's no newline, and the chunk is

[466] Fix | Delete

# larger than size, or 2) there is a newline, but the

[467] Fix | Delete

# resulting line would be longer than 'size'.

[468] Fix | Delete

if (size <= i) or (i == -1 and len(c) > size):

[469] Fix | Delete

i = size - 1

[470] Fix | Delete

[471] Fix | Delete

if i >= 0 or c == '':

[472] Fix | Delete

bufs.append(c[:i + 1]) # Add portion of last chunk

[473] Fix | Delete

self._unread(c[i + 1:]) # Push back rest of chunk

[474] Fix | Delete

break

[475] Fix | Delete

[476] Fix | Delete

# Append chunk to list, decrease 'size',

[477] Fix | Delete

bufs.append(c)

[478] Fix | Delete

size = size - len(c)

[479] Fix | Delete

readsize = min(size, readsize * 2)

[480] Fix | Delete

if readsize > self.min_readsize:

[481] Fix | Delete

self.min_readsize = min(readsize, self.min_readsize * 2, 512)

[482] Fix | Delete

return ''.join(bufs) # Return resulting line

[483] Fix | Delete

[484] Fix | Delete

[485] Fix | Delete

def _test():

[486] Fix | Delete

# Act like gzip; with -d, act like gunzip.

[487] Fix | Delete

# The input file is not deleted, however, nor are any other gzip

[488] Fix | Delete

# options or features supported.

[489] Fix | Delete

args = sys.argv[1:]

[490] Fix | Delete

decompress = args and args[0] == "-d"

[491] Fix | Delete

if decompress:

[492] Fix | Delete

args = args[1:]

[493] Fix | Delete

if not args:

[494] Fix | Delete

args = ["-"]

[495] Fix | Delete

for arg in args:

[496] Fix | Delete

if decompress:

[497] Fix | Delete

if arg == "-":

[498] Fix | Delete

f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)

[499] Fix | Delete