Edit File by line

""" codecs -- Python Codec Registry, API and helpers.

[0] Fix | Delete

[1] Fix | Delete

[2] Fix | Delete

Written by Marc-Andre Lemburg (mal@lemburg.com).

[3] Fix | Delete

[4] Fix | Delete

[5] Fix | Delete

[6] Fix | Delete

"""#"

[7] Fix | Delete

[8] Fix | Delete

import __builtin__, sys

[9] Fix | Delete

[10] Fix | Delete

### Registry and builtin stateless codec functions

[11] Fix | Delete

[12] Fix | Delete

try:

[13] Fix | Delete

from _codecs import *

[14] Fix | Delete

except ImportError, why:

[15] Fix | Delete

raise SystemError('Failed to load the builtin codecs: %s' % why)

[16] Fix | Delete

[17] Fix | Delete

__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",

[18] Fix | Delete

"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",

[19] Fix | Delete

"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",

[20] Fix | Delete

"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",

[21] Fix | Delete

"CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder",

[22] Fix | Delete

"StreamReader", "StreamWriter",

[23] Fix | Delete

"StreamReaderWriter", "StreamRecoder",

[24] Fix | Delete

"getencoder", "getdecoder", "getincrementalencoder",

[25] Fix | Delete

"getincrementaldecoder", "getreader", "getwriter",

[26] Fix | Delete

"encode", "decode", "iterencode", "iterdecode",

[27] Fix | Delete

"strict_errors", "ignore_errors", "replace_errors",

[28] Fix | Delete

"xmlcharrefreplace_errors", "backslashreplace_errors",

[29] Fix | Delete

"register_error", "lookup_error"]

[30] Fix | Delete

[31] Fix | Delete

### Constants

[32] Fix | Delete

[33] Fix | Delete

[34] Fix | Delete

# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)

[35] Fix | Delete

# and its possible byte string values

[36] Fix | Delete

# for UTF8/UTF16/UTF32 output and little/big endian machines

[37] Fix | Delete

[38] Fix | Delete

[39] Fix | Delete

# UTF-8

[40] Fix | Delete

BOM_UTF8 = '\xef\xbb\xbf'

[41] Fix | Delete

[42] Fix | Delete

# UTF-16, little endian

[43] Fix | Delete

BOM_LE = BOM_UTF16_LE = '\xff\xfe'

[44] Fix | Delete

[45] Fix | Delete

# UTF-16, big endian

[46] Fix | Delete

BOM_BE = BOM_UTF16_BE = '\xfe\xff'

[47] Fix | Delete

[48] Fix | Delete

# UTF-32, little endian

[49] Fix | Delete

BOM_UTF32_LE = '\xff\xfe\x00\x00'

[50] Fix | Delete

[51] Fix | Delete

# UTF-32, big endian

[52] Fix | Delete

BOM_UTF32_BE = '\x00\x00\xfe\xff'

[53] Fix | Delete

[54] Fix | Delete

if sys.byteorder == 'little':

[55] Fix | Delete

[56] Fix | Delete

# UTF-16, native endianness

[57] Fix | Delete

BOM = BOM_UTF16 = BOM_UTF16_LE

[58] Fix | Delete

[59] Fix | Delete

# UTF-32, native endianness

[60] Fix | Delete

BOM_UTF32 = BOM_UTF32_LE

[61] Fix | Delete

[62] Fix | Delete

else:

[63] Fix | Delete

[64] Fix | Delete

# UTF-16, native endianness

[65] Fix | Delete

BOM = BOM_UTF16 = BOM_UTF16_BE

[66] Fix | Delete

[67] Fix | Delete

# UTF-32, native endianness

[68] Fix | Delete

BOM_UTF32 = BOM_UTF32_BE

[69] Fix | Delete

[70] Fix | Delete

# Old broken names (don't use in new code)

[71] Fix | Delete

BOM32_LE = BOM_UTF16_LE

[72] Fix | Delete

BOM32_BE = BOM_UTF16_BE

[73] Fix | Delete

BOM64_LE = BOM_UTF32_LE

[74] Fix | Delete

BOM64_BE = BOM_UTF32_BE

[75] Fix | Delete

[76] Fix | Delete

[77] Fix | Delete

### Codec base classes (defining the API)

[78] Fix | Delete

[79] Fix | Delete

class CodecInfo(tuple):

[80] Fix | Delete

"""Codec details when looking up the codec registry"""

[81] Fix | Delete

[82] Fix | Delete

# Private API to allow Python to blacklist the known non-Unicode

[83] Fix | Delete

# codecs in the standard library. A more general mechanism to

[84] Fix | Delete

# reliably distinguish test encodings from other codecs will hopefully

[85] Fix | Delete

# be defined for Python 3.5

[86] Fix | Delete

[87] Fix | Delete

# See http://bugs.python.org/issue19619

[88] Fix | Delete

_is_text_encoding = True # Assume codecs are text encodings by default

[89] Fix | Delete

[90] Fix | Delete

def __new__(cls, encode, decode, streamreader=None, streamwriter=None,

[91] Fix | Delete

incrementalencoder=None, incrementaldecoder=None, name=None,

[92] Fix | Delete

_is_text_encoding=None):

[93] Fix | Delete

self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))

[94] Fix | Delete

self.name = name

[95] Fix | Delete

self.encode = encode

[96] Fix | Delete

self.decode = decode

[97] Fix | Delete

self.incrementalencoder = incrementalencoder

[98] Fix | Delete

self.incrementaldecoder = incrementaldecoder

[99] Fix | Delete

self.streamwriter = streamwriter

[100] Fix | Delete

self.streamreader = streamreader

[101] Fix | Delete

if _is_text_encoding is not None:

[102] Fix | Delete

self._is_text_encoding = _is_text_encoding

[103] Fix | Delete

return self

[104] Fix | Delete

[105] Fix | Delete

def __repr__(self):

[106] Fix | Delete

return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))

[107] Fix | Delete

[108] Fix | Delete

class Codec:

[109] Fix | Delete

[110] Fix | Delete

""" Defines the interface for stateless encoders/decoders.

[111] Fix | Delete

[112] Fix | Delete

The .encode()/.decode() methods may use different error

[113] Fix | Delete

handling schemes by providing the errors argument. These

[114] Fix | Delete

string values are predefined:

[115] Fix | Delete

[116] Fix | Delete

'strict' - raise a ValueError error (or a subclass)

[117] Fix | Delete

'ignore' - ignore the character and continue with the next

[118] Fix | Delete

'replace' - replace with a suitable replacement character;

[119] Fix | Delete

Python will use the official U+FFFD REPLACEMENT

[120] Fix | Delete

CHARACTER for the builtin Unicode codecs on

[121] Fix | Delete

decoding and '?' on encoding.

[122] Fix | Delete

'xmlcharrefreplace' - Replace with the appropriate XML

[123] Fix | Delete

character reference (only for encoding).

[124] Fix | Delete

'backslashreplace' - Replace with backslashed escape sequences

[125] Fix | Delete

(only for encoding).

[126] Fix | Delete

[127] Fix | Delete

The set of allowed values can be extended via register_error.

[128] Fix | Delete

[129] Fix | Delete

"""

[130] Fix | Delete

def encode(self, input, errors='strict'):

[131] Fix | Delete

[132] Fix | Delete

""" Encodes the object input and returns a tuple (output

[133] Fix | Delete

object, length consumed).

[134] Fix | Delete

[135] Fix | Delete

errors defines the error handling to apply. It defaults to

[136] Fix | Delete

'strict' handling.

[137] Fix | Delete

[138] Fix | Delete

The method may not store state in the Codec instance. Use

[139] Fix | Delete

StreamWriter for codecs which have to keep state in order to

[140] Fix | Delete

make encoding efficient.

[141] Fix | Delete

[142] Fix | Delete

The encoder must be able to handle zero length input and

[143] Fix | Delete

return an empty object of the output object type in this

[144] Fix | Delete

situation.

[145] Fix | Delete

[146] Fix | Delete

"""

[147] Fix | Delete

raise NotImplementedError

[148] Fix | Delete

[149] Fix | Delete

def decode(self, input, errors='strict'):

[150] Fix | Delete

[151] Fix | Delete

""" Decodes the object input and returns a tuple (output

[152] Fix | Delete

object, length consumed).

[153] Fix | Delete

[154] Fix | Delete

input must be an object which provides the bf_getreadbuf

[155] Fix | Delete

buffer slot. Python strings, buffer objects and memory

[156] Fix | Delete

mapped files are examples of objects providing this slot.

[157] Fix | Delete

[158] Fix | Delete

errors defines the error handling to apply. It defaults to

[159] Fix | Delete

'strict' handling.

[160] Fix | Delete

[161] Fix | Delete

The method may not store state in the Codec instance. Use

[162] Fix | Delete

StreamReader for codecs which have to keep state in order to

[163] Fix | Delete

make decoding efficient.

[164] Fix | Delete

[165] Fix | Delete

The decoder must be able to handle zero length input and

[166] Fix | Delete

return an empty object of the output object type in this

[167] Fix | Delete

situation.

[168] Fix | Delete

[169] Fix | Delete

"""

[170] Fix | Delete

raise NotImplementedError

[171] Fix | Delete

[172] Fix | Delete

class IncrementalEncoder(object):

[173] Fix | Delete

"""

[174] Fix | Delete

An IncrementalEncoder encodes an input in multiple steps. The input can be

[175] Fix | Delete

passed piece by piece to the encode() method. The IncrementalEncoder remembers

[176] Fix | Delete

the state of the Encoding process between calls to encode().

[177] Fix | Delete

"""

[178] Fix | Delete

def __init__(self, errors='strict'):

[179] Fix | Delete

"""

[180] Fix | Delete

Creates an IncrementalEncoder instance.

[181] Fix | Delete

[182] Fix | Delete

The IncrementalEncoder may use different error handling schemes by

[183] Fix | Delete

providing the errors keyword argument. See the module docstring

[184] Fix | Delete

for a list of possible values.

[185] Fix | Delete

"""

[186] Fix | Delete

self.errors = errors

[187] Fix | Delete

self.buffer = ""

[188] Fix | Delete

[189] Fix | Delete

def encode(self, input, final=False):

[190] Fix | Delete

"""

[191] Fix | Delete

Encodes input and returns the resulting object.

[192] Fix | Delete

"""

[193] Fix | Delete

raise NotImplementedError

[194] Fix | Delete

[195] Fix | Delete

def reset(self):

[196] Fix | Delete

"""

[197] Fix | Delete

Resets the encoder to the initial state.

[198] Fix | Delete

"""

[199] Fix | Delete

[200] Fix | Delete

def getstate(self):

[201] Fix | Delete

"""

[202] Fix | Delete

Return the current state of the encoder.

[203] Fix | Delete

"""

[204] Fix | Delete

return 0

[205] Fix | Delete

[206] Fix | Delete

def setstate(self, state):

[207] Fix | Delete

"""

[208] Fix | Delete

Set the current state of the encoder. state must have been

[209] Fix | Delete

returned by getstate().

[210] Fix | Delete

"""

[211] Fix | Delete

[212] Fix | Delete

class BufferedIncrementalEncoder(IncrementalEncoder):

[213] Fix | Delete

"""

[214] Fix | Delete

This subclass of IncrementalEncoder can be used as the baseclass for an

[215] Fix | Delete

incremental encoder if the encoder must keep some of the output in a

[216] Fix | Delete

buffer between calls to encode().

[217] Fix | Delete

"""

[218] Fix | Delete

def __init__(self, errors='strict'):

[219] Fix | Delete

IncrementalEncoder.__init__(self, errors)

[220] Fix | Delete

self.buffer = "" # unencoded input that is kept between calls to encode()

[221] Fix | Delete

[222] Fix | Delete

def _buffer_encode(self, input, errors, final):

[223] Fix | Delete

# Overwrite this method in subclasses: It must encode input

[224] Fix | Delete

# and return an (output, length consumed) tuple

[225] Fix | Delete

raise NotImplementedError

[226] Fix | Delete

[227] Fix | Delete

def encode(self, input, final=False):

[228] Fix | Delete

# encode input (taking the buffer into account)

[229] Fix | Delete

data = self.buffer + input

[230] Fix | Delete

(result, consumed) = self._buffer_encode(data, self.errors, final)

[231] Fix | Delete

# keep unencoded input until the next call

[232] Fix | Delete

self.buffer = data[consumed:]

[233] Fix | Delete

return result

[234] Fix | Delete

[235] Fix | Delete

def reset(self):

[236] Fix | Delete

IncrementalEncoder.reset(self)

[237] Fix | Delete

self.buffer = ""

[238] Fix | Delete

[239] Fix | Delete

def getstate(self):

[240] Fix | Delete

return self.buffer or 0

[241] Fix | Delete

[242] Fix | Delete

def setstate(self, state):

[243] Fix | Delete

self.buffer = state or ""

[244] Fix | Delete

[245] Fix | Delete

class IncrementalDecoder(object):

[246] Fix | Delete

"""

[247] Fix | Delete

An IncrementalDecoder decodes an input in multiple steps. The input can be

[248] Fix | Delete

passed piece by piece to the decode() method. The IncrementalDecoder

[249] Fix | Delete

remembers the state of the decoding process between calls to decode().

[250] Fix | Delete

"""

[251] Fix | Delete

def __init__(self, errors='strict'):

[252] Fix | Delete

"""

[253] Fix | Delete

Creates an IncrementalDecoder instance.

[254] Fix | Delete

[255] Fix | Delete

The IncrementalDecoder may use different error handling schemes by

[256] Fix | Delete

providing the errors keyword argument. See the module docstring

[257] Fix | Delete

for a list of possible values.

[258] Fix | Delete

"""

[259] Fix | Delete

self.errors = errors

[260] Fix | Delete

[261] Fix | Delete

def decode(self, input, final=False):

[262] Fix | Delete

"""

[263] Fix | Delete

Decodes input and returns the resulting object.

[264] Fix | Delete

"""

[265] Fix | Delete

raise NotImplementedError

[266] Fix | Delete

[267] Fix | Delete

def reset(self):

[268] Fix | Delete

"""

[269] Fix | Delete

Resets the decoder to the initial state.

[270] Fix | Delete

"""

[271] Fix | Delete

[272] Fix | Delete

def getstate(self):

[273] Fix | Delete

"""

[274] Fix | Delete

Return the current state of the decoder.

[275] Fix | Delete

[276] Fix | Delete

This must be a (buffered_input, additional_state_info) tuple.

[277] Fix | Delete

buffered_input must be a bytes object containing bytes that

[278] Fix | Delete

were passed to decode() that have not yet been converted.

[279] Fix | Delete

additional_state_info must be a non-negative integer

[280] Fix | Delete

representing the state of the decoder WITHOUT yet having

[281] Fix | Delete

processed the contents of buffered_input. In the initial state

[282] Fix | Delete

and after reset(), getstate() must return (b"", 0).

[283] Fix | Delete

"""

[284] Fix | Delete

return (b"", 0)

[285] Fix | Delete

[286] Fix | Delete

def setstate(self, state):

[287] Fix | Delete

"""

[288] Fix | Delete

Set the current state of the decoder.

[289] Fix | Delete

[290] Fix | Delete

state must have been returned by getstate(). The effect of

[291] Fix | Delete

setstate((b"", 0)) must be equivalent to reset().

[292] Fix | Delete

"""

[293] Fix | Delete

[294] Fix | Delete

class BufferedIncrementalDecoder(IncrementalDecoder):

[295] Fix | Delete

"""

[296] Fix | Delete

This subclass of IncrementalDecoder can be used as the baseclass for an

[297] Fix | Delete

incremental decoder if the decoder must be able to handle incomplete byte

[298] Fix | Delete

sequences.

[299] Fix | Delete

"""

[300] Fix | Delete

def __init__(self, errors='strict'):

[301] Fix | Delete

IncrementalDecoder.__init__(self, errors)

[302] Fix | Delete

self.buffer = "" # undecoded input that is kept between calls to decode()

[303] Fix | Delete

[304] Fix | Delete

def _buffer_decode(self, input, errors, final):

[305] Fix | Delete

# Overwrite this method in subclasses: It must decode input

[306] Fix | Delete

# and return an (output, length consumed) tuple

[307] Fix | Delete

raise NotImplementedError

[308] Fix | Delete

[309] Fix | Delete

def decode(self, input, final=False):

[310] Fix | Delete

# decode input (taking the buffer into account)

[311] Fix | Delete

data = self.buffer + input

[312] Fix | Delete

(result, consumed) = self._buffer_decode(data, self.errors, final)

[313] Fix | Delete

# keep undecoded input until the next call

[314] Fix | Delete

self.buffer = data[consumed:]

[315] Fix | Delete

return result

[316] Fix | Delete

[317] Fix | Delete

def reset(self):

[318] Fix | Delete

IncrementalDecoder.reset(self)

[319] Fix | Delete

self.buffer = ""

[320] Fix | Delete

[321] Fix | Delete

def getstate(self):

[322] Fix | Delete

# additional state info is always 0

[323] Fix | Delete

return (self.buffer, 0)

[324] Fix | Delete

[325] Fix | Delete

def setstate(self, state):

[326] Fix | Delete

# ignore additional state info

[327] Fix | Delete

self.buffer = state[0]

[328] Fix | Delete

[329] Fix | Delete

[330] Fix | Delete

# The StreamWriter and StreamReader class provide generic working

[331] Fix | Delete

# interfaces which can be used to implement new encoding submodules

[332] Fix | Delete

# very easily. See encodings/utf_8.py for an example on how this is

[333] Fix | Delete

# done.

[334] Fix | Delete

[335] Fix | Delete

[336] Fix | Delete

class StreamWriter(Codec):

[337] Fix | Delete

[338] Fix | Delete

def __init__(self, stream, errors='strict'):

[339] Fix | Delete

[340] Fix | Delete

""" Creates a StreamWriter instance.

[341] Fix | Delete

[342] Fix | Delete

stream must be a file-like object open for writing

[343] Fix | Delete

(binary) data.

[344] Fix | Delete

[345] Fix | Delete

The StreamWriter may use different error handling

[346] Fix | Delete

schemes by providing the errors keyword argument. These

[347] Fix | Delete

parameters are predefined:

[348] Fix | Delete

[349] Fix | Delete

'strict' - raise a ValueError (or a subclass)

[350] Fix | Delete

'ignore' - ignore the character and continue with the next

[351] Fix | Delete

'replace'- replace with a suitable replacement character

[352] Fix | Delete

'xmlcharrefreplace' - Replace with the appropriate XML

[353] Fix | Delete

character reference.

[354] Fix | Delete

'backslashreplace' - Replace with backslashed escape

[355] Fix | Delete

sequences (only for encoding).

[356] Fix | Delete

[357] Fix | Delete

The set of allowed parameter values can be extended via

[358] Fix | Delete

register_error.

[359] Fix | Delete

"""

[360] Fix | Delete

self.stream = stream

[361] Fix | Delete

self.errors = errors

[362] Fix | Delete

[363] Fix | Delete

def write(self, object):

[364] Fix | Delete

[365] Fix | Delete

""" Writes the object's contents encoded to self.stream.

[366] Fix | Delete

"""

[367] Fix | Delete

data, consumed = self.encode(object, self.errors)

[368] Fix | Delete

self.stream.write(data)

[369] Fix | Delete

[370] Fix | Delete

def writelines(self, list):

[371] Fix | Delete

[372] Fix | Delete

""" Writes the concatenated list of strings to the stream

[373] Fix | Delete

using .write().

[374] Fix | Delete

"""

[375] Fix | Delete

self.write(''.join(list))

[376] Fix | Delete

[377] Fix | Delete

def reset(self):

[378] Fix | Delete

[379] Fix | Delete

""" Flushes and resets the codec buffers used for keeping state.

[380] Fix | Delete

[381] Fix | Delete

Calling this method should ensure that the data on the

[382] Fix | Delete

output is put into a clean state, that allows appending

[383] Fix | Delete

of new fresh data without having to rescan the whole

[384] Fix | Delete

stream to recover state.

[385] Fix | Delete

[386] Fix | Delete

"""

[387] Fix | Delete

pass

[388] Fix | Delete

[389] Fix | Delete

def seek(self, offset, whence=0):

[390] Fix | Delete

self.stream.seek(offset, whence)

[391] Fix | Delete

if whence == 0 and offset == 0:

[392] Fix | Delete

self.reset()

[393] Fix | Delete

[394] Fix | Delete

def __getattr__(self, name,

[395] Fix | Delete

getattr=getattr):

[396] Fix | Delete

[397] Fix | Delete

""" Inherit all other methods from the underlying stream.

[398] Fix | Delete

"""

[399] Fix | Delete

return getattr(self.stream, name)

[400] Fix | Delete

[401] Fix | Delete

def __enter__(self):

[402] Fix | Delete

return self

[403] Fix | Delete

[404] Fix | Delete

def __exit__(self, type, value, tb):

[405] Fix | Delete

self.stream.close()

[406] Fix | Delete

[407] Fix | Delete

###

[408] Fix | Delete

[409] Fix | Delete

class StreamReader(Codec):

[410] Fix | Delete

[411] Fix | Delete

def __init__(self, stream, errors='strict'):

[412] Fix | Delete

[413] Fix | Delete

""" Creates a StreamReader instance.

[414] Fix | Delete

[415] Fix | Delete

stream must be a file-like object open for reading

[416] Fix | Delete

(binary) data.

[417] Fix | Delete

[418] Fix | Delete

The StreamReader may use different error handling

[419] Fix | Delete

schemes by providing the errors keyword argument. These

[420] Fix | Delete

parameters are predefined:

[421] Fix | Delete

[422] Fix | Delete

'strict' - raise a ValueError (or a subclass)

[423] Fix | Delete

'ignore' - ignore the character and continue with the next

[424] Fix | Delete

'replace'- replace with a suitable replacement character;

[425] Fix | Delete

[426] Fix | Delete

The set of allowed parameter values can be extended via

[427] Fix | Delete

register_error.

[428] Fix | Delete

"""

[429] Fix | Delete

self.stream = stream

[430] Fix | Delete

self.errors = errors

[431] Fix | Delete

self.bytebuffer = ""

[432] Fix | Delete

# For str->str decoding this will stay a str

[433] Fix | Delete

# For str->unicode decoding the first read will promote it to unicode

[434] Fix | Delete

self.charbuffer = ""

[435] Fix | Delete

self.linebuffer = None

[436] Fix | Delete

[437] Fix | Delete

def decode(self, input, errors='strict'):

[438] Fix | Delete

raise NotImplementedError

[439] Fix | Delete

[440] Fix | Delete

def read(self, size=-1, chars=-1, firstline=False):

[441] Fix | Delete

[442] Fix | Delete

""" Decodes data from the stream self.stream and returns the

[443] Fix | Delete

resulting object.

[444] Fix | Delete

[445] Fix | Delete

chars indicates the number of characters to read from the

[446] Fix | Delete

stream. read() will never return more than chars

[447] Fix | Delete

characters, but it might return less, if there are not enough

[448] Fix | Delete

characters available.

[449] Fix | Delete

[450] Fix | Delete

size indicates the approximate maximum number of bytes to

[451] Fix | Delete

read from the stream for decoding purposes. The decoder

[452] Fix | Delete

can modify this setting as appropriate. The default value

[453] Fix | Delete

-1 indicates to read and decode as much as possible. size

[454] Fix | Delete

is intended to prevent having to decode huge files in one

[455] Fix | Delete

step.

[456] Fix | Delete

[457] Fix | Delete

If firstline is true, and a UnicodeDecodeError happens

[458] Fix | Delete

after the first line terminator in the input only the first line

[459] Fix | Delete

will be returned, the rest of the input will be kept until the

[460] Fix | Delete

next call to read().

[461] Fix | Delete

[462] Fix | Delete

The method should use a greedy read strategy meaning that

[463] Fix | Delete

it should read as much data as is allowed within the

[464] Fix | Delete

definition of the encoding and the given size, e.g. if

[465] Fix | Delete

optional encoding endings or state markers are available

[466] Fix | Delete

on the stream, these should be read too.

[467] Fix | Delete

"""

[468] Fix | Delete

# If we have lines cached, first merge them back into characters

[469] Fix | Delete

if self.linebuffer:

[470] Fix | Delete

self.charbuffer = "".join(self.linebuffer)

[471] Fix | Delete

self.linebuffer = None

[472] Fix | Delete

[473] Fix | Delete

if chars < 0:

[474] Fix | Delete

# For compatibility with other read() methods that take a

[475] Fix | Delete

# single argument

[476] Fix | Delete

chars = size

[477] Fix | Delete

[478] Fix | Delete

# read until we get the required number of characters (if available)

[479] Fix | Delete

while True:

[480] Fix | Delete

# can the request be satisfied from the character buffer?

[481] Fix | Delete

if chars >= 0:

[482] Fix | Delete

if len(self.charbuffer) >= chars:

[483] Fix | Delete

break

[484] Fix | Delete

# we need more data

[485] Fix | Delete

if size < 0:

[486] Fix | Delete

newdata = self.stream.read()

[487] Fix | Delete

else:

[488] Fix | Delete

newdata = self.stream.read(size)

[489] Fix | Delete

# decode bytes (those remaining from the last call included)

[490] Fix | Delete

data = self.bytebuffer + newdata

[491] Fix | Delete

try:

[492] Fix | Delete

newchars, decodedbytes = self.decode(data, self.errors)

[493] Fix | Delete

except UnicodeDecodeError, exc:

[494] Fix | Delete

if firstline:

[495] Fix | Delete

newchars, decodedbytes = self.decode(data[:exc.start], self.errors)

[496] Fix | Delete

lines = newchars.splitlines(True)

[497] Fix | Delete

if len(lines)<=1:

[498] Fix | Delete

raise

[499] Fix | Delete

12 3