Edit File by line
/home/barbar84/public_h.../wp-conte.../plugins/sujqvwi/ShExBy/shex_roo.../usr/lib64/python2....
File: codecs.py
""" codecs -- Python Codec Registry, API and helpers.
[0] Fix | Delete
[1] Fix | Delete
[2] Fix | Delete
Written by Marc-Andre Lemburg (mal@lemburg.com).
[3] Fix | Delete
[4] Fix | Delete
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
[5] Fix | Delete
[6] Fix | Delete
"""#"
[7] Fix | Delete
[8] Fix | Delete
import __builtin__, sys
[9] Fix | Delete
[10] Fix | Delete
### Registry and builtin stateless codec functions
[11] Fix | Delete
[12] Fix | Delete
try:
[13] Fix | Delete
from _codecs import *
[14] Fix | Delete
except ImportError, why:
[15] Fix | Delete
raise SystemError('Failed to load the builtin codecs: %s' % why)
[16] Fix | Delete
[17] Fix | Delete
__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
[18] Fix | Delete
"BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
[19] Fix | Delete
"BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
[20] Fix | Delete
"BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
[21] Fix | Delete
"CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder",
[22] Fix | Delete
"StreamReader", "StreamWriter",
[23] Fix | Delete
"StreamReaderWriter", "StreamRecoder",
[24] Fix | Delete
"getencoder", "getdecoder", "getincrementalencoder",
[25] Fix | Delete
"getincrementaldecoder", "getreader", "getwriter",
[26] Fix | Delete
"encode", "decode", "iterencode", "iterdecode",
[27] Fix | Delete
"strict_errors", "ignore_errors", "replace_errors",
[28] Fix | Delete
"xmlcharrefreplace_errors", "backslashreplace_errors",
[29] Fix | Delete
"register_error", "lookup_error"]
[30] Fix | Delete
[31] Fix | Delete
### Constants
[32] Fix | Delete
[33] Fix | Delete
#
[34] Fix | Delete
# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
[35] Fix | Delete
# and its possible byte string values
[36] Fix | Delete
# for UTF8/UTF16/UTF32 output and little/big endian machines
[37] Fix | Delete
#
[38] Fix | Delete
[39] Fix | Delete
# UTF-8
[40] Fix | Delete
BOM_UTF8 = '\xef\xbb\xbf'
[41] Fix | Delete
[42] Fix | Delete
# UTF-16, little endian
[43] Fix | Delete
BOM_LE = BOM_UTF16_LE = '\xff\xfe'
[44] Fix | Delete
[45] Fix | Delete
# UTF-16, big endian
[46] Fix | Delete
BOM_BE = BOM_UTF16_BE = '\xfe\xff'
[47] Fix | Delete
[48] Fix | Delete
# UTF-32, little endian
[49] Fix | Delete
BOM_UTF32_LE = '\xff\xfe\x00\x00'
[50] Fix | Delete
[51] Fix | Delete
# UTF-32, big endian
[52] Fix | Delete
BOM_UTF32_BE = '\x00\x00\xfe\xff'
[53] Fix | Delete
[54] Fix | Delete
if sys.byteorder == 'little':
[55] Fix | Delete
[56] Fix | Delete
# UTF-16, native endianness
[57] Fix | Delete
BOM = BOM_UTF16 = BOM_UTF16_LE
[58] Fix | Delete
[59] Fix | Delete
# UTF-32, native endianness
[60] Fix | Delete
BOM_UTF32 = BOM_UTF32_LE
[61] Fix | Delete
[62] Fix | Delete
else:
[63] Fix | Delete
[64] Fix | Delete
# UTF-16, native endianness
[65] Fix | Delete
BOM = BOM_UTF16 = BOM_UTF16_BE
[66] Fix | Delete
[67] Fix | Delete
# UTF-32, native endianness
[68] Fix | Delete
BOM_UTF32 = BOM_UTF32_BE
[69] Fix | Delete
[70] Fix | Delete
# Old broken names (don't use in new code)
[71] Fix | Delete
BOM32_LE = BOM_UTF16_LE
[72] Fix | Delete
BOM32_BE = BOM_UTF16_BE
[73] Fix | Delete
BOM64_LE = BOM_UTF32_LE
[74] Fix | Delete
BOM64_BE = BOM_UTF32_BE
[75] Fix | Delete
[76] Fix | Delete
[77] Fix | Delete
### Codec base classes (defining the API)
[78] Fix | Delete
[79] Fix | Delete
class CodecInfo(tuple):
[80] Fix | Delete
"""Codec details when looking up the codec registry"""
[81] Fix | Delete
[82] Fix | Delete
# Private API to allow Python to blacklist the known non-Unicode
[83] Fix | Delete
# codecs in the standard library. A more general mechanism to
[84] Fix | Delete
# reliably distinguish test encodings from other codecs will hopefully
[85] Fix | Delete
# be defined for Python 3.5
[86] Fix | Delete
#
[87] Fix | Delete
# See http://bugs.python.org/issue19619
[88] Fix | Delete
_is_text_encoding = True # Assume codecs are text encodings by default
[89] Fix | Delete
[90] Fix | Delete
def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
[91] Fix | Delete
incrementalencoder=None, incrementaldecoder=None, name=None,
[92] Fix | Delete
_is_text_encoding=None):
[93] Fix | Delete
self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
[94] Fix | Delete
self.name = name
[95] Fix | Delete
self.encode = encode
[96] Fix | Delete
self.decode = decode
[97] Fix | Delete
self.incrementalencoder = incrementalencoder
[98] Fix | Delete
self.incrementaldecoder = incrementaldecoder
[99] Fix | Delete
self.streamwriter = streamwriter
[100] Fix | Delete
self.streamreader = streamreader
[101] Fix | Delete
if _is_text_encoding is not None:
[102] Fix | Delete
self._is_text_encoding = _is_text_encoding
[103] Fix | Delete
return self
[104] Fix | Delete
[105] Fix | Delete
def __repr__(self):
[106] Fix | Delete
return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
[107] Fix | Delete
[108] Fix | Delete
class Codec:
[109] Fix | Delete
[110] Fix | Delete
""" Defines the interface for stateless encoders/decoders.
[111] Fix | Delete
[112] Fix | Delete
The .encode()/.decode() methods may use different error
[113] Fix | Delete
handling schemes by providing the errors argument. These
[114] Fix | Delete
string values are predefined:
[115] Fix | Delete
[116] Fix | Delete
'strict' - raise a ValueError error (or a subclass)
[117] Fix | Delete
'ignore' - ignore the character and continue with the next
[118] Fix | Delete
'replace' - replace with a suitable replacement character;
[119] Fix | Delete
Python will use the official U+FFFD REPLACEMENT
[120] Fix | Delete
CHARACTER for the builtin Unicode codecs on
[121] Fix | Delete
decoding and '?' on encoding.
[122] Fix | Delete
'xmlcharrefreplace' - Replace with the appropriate XML
[123] Fix | Delete
character reference (only for encoding).
[124] Fix | Delete
'backslashreplace' - Replace with backslashed escape sequences
[125] Fix | Delete
(only for encoding).
[126] Fix | Delete
[127] Fix | Delete
The set of allowed values can be extended via register_error.
[128] Fix | Delete
[129] Fix | Delete
"""
[130] Fix | Delete
def encode(self, input, errors='strict'):
[131] Fix | Delete
[132] Fix | Delete
""" Encodes the object input and returns a tuple (output
[133] Fix | Delete
object, length consumed).
[134] Fix | Delete
[135] Fix | Delete
errors defines the error handling to apply. It defaults to
[136] Fix | Delete
'strict' handling.
[137] Fix | Delete
[138] Fix | Delete
The method may not store state in the Codec instance. Use
[139] Fix | Delete
StreamWriter for codecs which have to keep state in order to
[140] Fix | Delete
make encoding efficient.
[141] Fix | Delete
[142] Fix | Delete
The encoder must be able to handle zero length input and
[143] Fix | Delete
return an empty object of the output object type in this
[144] Fix | Delete
situation.
[145] Fix | Delete
[146] Fix | Delete
"""
[147] Fix | Delete
raise NotImplementedError
[148] Fix | Delete
[149] Fix | Delete
def decode(self, input, errors='strict'):
[150] Fix | Delete
[151] Fix | Delete
""" Decodes the object input and returns a tuple (output
[152] Fix | Delete
object, length consumed).
[153] Fix | Delete
[154] Fix | Delete
input must be an object which provides the bf_getreadbuf
[155] Fix | Delete
buffer slot. Python strings, buffer objects and memory
[156] Fix | Delete
mapped files are examples of objects providing this slot.
[157] Fix | Delete
[158] Fix | Delete
errors defines the error handling to apply. It defaults to
[159] Fix | Delete
'strict' handling.
[160] Fix | Delete
[161] Fix | Delete
The method may not store state in the Codec instance. Use
[162] Fix | Delete
StreamReader for codecs which have to keep state in order to
[163] Fix | Delete
make decoding efficient.
[164] Fix | Delete
[165] Fix | Delete
The decoder must be able to handle zero length input and
[166] Fix | Delete
return an empty object of the output object type in this
[167] Fix | Delete
situation.
[168] Fix | Delete
[169] Fix | Delete
"""
[170] Fix | Delete
raise NotImplementedError
[171] Fix | Delete
[172] Fix | Delete
class IncrementalEncoder(object):
[173] Fix | Delete
"""
[174] Fix | Delete
An IncrementalEncoder encodes an input in multiple steps. The input can be
[175] Fix | Delete
passed piece by piece to the encode() method. The IncrementalEncoder remembers
[176] Fix | Delete
the state of the Encoding process between calls to encode().
[177] Fix | Delete
"""
[178] Fix | Delete
def __init__(self, errors='strict'):
[179] Fix | Delete
"""
[180] Fix | Delete
Creates an IncrementalEncoder instance.
[181] Fix | Delete
[182] Fix | Delete
The IncrementalEncoder may use different error handling schemes by
[183] Fix | Delete
providing the errors keyword argument. See the module docstring
[184] Fix | Delete
for a list of possible values.
[185] Fix | Delete
"""
[186] Fix | Delete
self.errors = errors
[187] Fix | Delete
self.buffer = ""
[188] Fix | Delete
[189] Fix | Delete
def encode(self, input, final=False):
[190] Fix | Delete
"""
[191] Fix | Delete
Encodes input and returns the resulting object.
[192] Fix | Delete
"""
[193] Fix | Delete
raise NotImplementedError
[194] Fix | Delete
[195] Fix | Delete
def reset(self):
[196] Fix | Delete
"""
[197] Fix | Delete
Resets the encoder to the initial state.
[198] Fix | Delete
"""
[199] Fix | Delete
[200] Fix | Delete
def getstate(self):
[201] Fix | Delete
"""
[202] Fix | Delete
Return the current state of the encoder.
[203] Fix | Delete
"""
[204] Fix | Delete
return 0
[205] Fix | Delete
[206] Fix | Delete
def setstate(self, state):
[207] Fix | Delete
"""
[208] Fix | Delete
Set the current state of the encoder. state must have been
[209] Fix | Delete
returned by getstate().
[210] Fix | Delete
"""
[211] Fix | Delete
[212] Fix | Delete
class BufferedIncrementalEncoder(IncrementalEncoder):
[213] Fix | Delete
"""
[214] Fix | Delete
This subclass of IncrementalEncoder can be used as the baseclass for an
[215] Fix | Delete
incremental encoder if the encoder must keep some of the output in a
[216] Fix | Delete
buffer between calls to encode().
[217] Fix | Delete
"""
[218] Fix | Delete
def __init__(self, errors='strict'):
[219] Fix | Delete
IncrementalEncoder.__init__(self, errors)
[220] Fix | Delete
self.buffer = "" # unencoded input that is kept between calls to encode()
[221] Fix | Delete
[222] Fix | Delete
def _buffer_encode(self, input, errors, final):
[223] Fix | Delete
# Overwrite this method in subclasses: It must encode input
[224] Fix | Delete
# and return an (output, length consumed) tuple
[225] Fix | Delete
raise NotImplementedError
[226] Fix | Delete
[227] Fix | Delete
def encode(self, input, final=False):
[228] Fix | Delete
# encode input (taking the buffer into account)
[229] Fix | Delete
data = self.buffer + input
[230] Fix | Delete
(result, consumed) = self._buffer_encode(data, self.errors, final)
[231] Fix | Delete
# keep unencoded input until the next call
[232] Fix | Delete
self.buffer = data[consumed:]
[233] Fix | Delete
return result
[234] Fix | Delete
[235] Fix | Delete
def reset(self):
[236] Fix | Delete
IncrementalEncoder.reset(self)
[237] Fix | Delete
self.buffer = ""
[238] Fix | Delete
[239] Fix | Delete
def getstate(self):
[240] Fix | Delete
return self.buffer or 0
[241] Fix | Delete
[242] Fix | Delete
def setstate(self, state):
[243] Fix | Delete
self.buffer = state or ""
[244] Fix | Delete
[245] Fix | Delete
class IncrementalDecoder(object):
[246] Fix | Delete
"""
[247] Fix | Delete
An IncrementalDecoder decodes an input in multiple steps. The input can be
[248] Fix | Delete
passed piece by piece to the decode() method. The IncrementalDecoder
[249] Fix | Delete
remembers the state of the decoding process between calls to decode().
[250] Fix | Delete
"""
[251] Fix | Delete
def __init__(self, errors='strict'):
[252] Fix | Delete
"""
[253] Fix | Delete
Creates an IncrementalDecoder instance.
[254] Fix | Delete
[255] Fix | Delete
The IncrementalDecoder may use different error handling schemes by
[256] Fix | Delete
providing the errors keyword argument. See the module docstring
[257] Fix | Delete
for a list of possible values.
[258] Fix | Delete
"""
[259] Fix | Delete
self.errors = errors
[260] Fix | Delete
[261] Fix | Delete
def decode(self, input, final=False):
[262] Fix | Delete
"""
[263] Fix | Delete
Decodes input and returns the resulting object.
[264] Fix | Delete
"""
[265] Fix | Delete
raise NotImplementedError
[266] Fix | Delete
[267] Fix | Delete
def reset(self):
[268] Fix | Delete
"""
[269] Fix | Delete
Resets the decoder to the initial state.
[270] Fix | Delete
"""
[271] Fix | Delete
[272] Fix | Delete
def getstate(self):
[273] Fix | Delete
"""
[274] Fix | Delete
Return the current state of the decoder.
[275] Fix | Delete
[276] Fix | Delete
This must be a (buffered_input, additional_state_info) tuple.
[277] Fix | Delete
buffered_input must be a bytes object containing bytes that
[278] Fix | Delete
were passed to decode() that have not yet been converted.
[279] Fix | Delete
additional_state_info must be a non-negative integer
[280] Fix | Delete
representing the state of the decoder WITHOUT yet having
[281] Fix | Delete
processed the contents of buffered_input. In the initial state
[282] Fix | Delete
and after reset(), getstate() must return (b"", 0).
[283] Fix | Delete
"""
[284] Fix | Delete
return (b"", 0)
[285] Fix | Delete
[286] Fix | Delete
def setstate(self, state):
[287] Fix | Delete
"""
[288] Fix | Delete
Set the current state of the decoder.
[289] Fix | Delete
[290] Fix | Delete
state must have been returned by getstate(). The effect of
[291] Fix | Delete
setstate((b"", 0)) must be equivalent to reset().
[292] Fix | Delete
"""
[293] Fix | Delete
[294] Fix | Delete
class BufferedIncrementalDecoder(IncrementalDecoder):
[295] Fix | Delete
"""
[296] Fix | Delete
This subclass of IncrementalDecoder can be used as the baseclass for an
[297] Fix | Delete
incremental decoder if the decoder must be able to handle incomplete byte
[298] Fix | Delete
sequences.
[299] Fix | Delete
"""
[300] Fix | Delete
def __init__(self, errors='strict'):
[301] Fix | Delete
IncrementalDecoder.__init__(self, errors)
[302] Fix | Delete
self.buffer = "" # undecoded input that is kept between calls to decode()
[303] Fix | Delete
[304] Fix | Delete
def _buffer_decode(self, input, errors, final):
[305] Fix | Delete
# Overwrite this method in subclasses: It must decode input
[306] Fix | Delete
# and return an (output, length consumed) tuple
[307] Fix | Delete
raise NotImplementedError
[308] Fix | Delete
[309] Fix | Delete
def decode(self, input, final=False):
[310] Fix | Delete
# decode input (taking the buffer into account)
[311] Fix | Delete
data = self.buffer + input
[312] Fix | Delete
(result, consumed) = self._buffer_decode(data, self.errors, final)
[313] Fix | Delete
# keep undecoded input until the next call
[314] Fix | Delete
self.buffer = data[consumed:]
[315] Fix | Delete
return result
[316] Fix | Delete
[317] Fix | Delete
def reset(self):
[318] Fix | Delete
IncrementalDecoder.reset(self)
[319] Fix | Delete
self.buffer = ""
[320] Fix | Delete
[321] Fix | Delete
def getstate(self):
[322] Fix | Delete
# additional state info is always 0
[323] Fix | Delete
return (self.buffer, 0)
[324] Fix | Delete
[325] Fix | Delete
def setstate(self, state):
[326] Fix | Delete
# ignore additional state info
[327] Fix | Delete
self.buffer = state[0]
[328] Fix | Delete
[329] Fix | Delete
#
[330] Fix | Delete
# The StreamWriter and StreamReader class provide generic working
[331] Fix | Delete
# interfaces which can be used to implement new encoding submodules
[332] Fix | Delete
# very easily. See encodings/utf_8.py for an example on how this is
[333] Fix | Delete
# done.
[334] Fix | Delete
#
[335] Fix | Delete
[336] Fix | Delete
class StreamWriter(Codec):
[337] Fix | Delete
[338] Fix | Delete
def __init__(self, stream, errors='strict'):
[339] Fix | Delete
[340] Fix | Delete
""" Creates a StreamWriter instance.
[341] Fix | Delete
[342] Fix | Delete
stream must be a file-like object open for writing
[343] Fix | Delete
(binary) data.
[344] Fix | Delete
[345] Fix | Delete
The StreamWriter may use different error handling
[346] Fix | Delete
schemes by providing the errors keyword argument. These
[347] Fix | Delete
parameters are predefined:
[348] Fix | Delete
[349] Fix | Delete
'strict' - raise a ValueError (or a subclass)
[350] Fix | Delete
'ignore' - ignore the character and continue with the next
[351] Fix | Delete
'replace'- replace with a suitable replacement character
[352] Fix | Delete
'xmlcharrefreplace' - Replace with the appropriate XML
[353] Fix | Delete
character reference.
[354] Fix | Delete
'backslashreplace' - Replace with backslashed escape
[355] Fix | Delete
sequences (only for encoding).
[356] Fix | Delete
[357] Fix | Delete
The set of allowed parameter values can be extended via
[358] Fix | Delete
register_error.
[359] Fix | Delete
"""
[360] Fix | Delete
self.stream = stream
[361] Fix | Delete
self.errors = errors
[362] Fix | Delete
[363] Fix | Delete
def write(self, object):
[364] Fix | Delete
[365] Fix | Delete
""" Writes the object's contents encoded to self.stream.
[366] Fix | Delete
"""
[367] Fix | Delete
data, consumed = self.encode(object, self.errors)
[368] Fix | Delete
self.stream.write(data)
[369] Fix | Delete
[370] Fix | Delete
def writelines(self, list):
[371] Fix | Delete
[372] Fix | Delete
""" Writes the concatenated list of strings to the stream
[373] Fix | Delete
using .write().
[374] Fix | Delete
"""
[375] Fix | Delete
self.write(''.join(list))
[376] Fix | Delete
[377] Fix | Delete
def reset(self):
[378] Fix | Delete
[379] Fix | Delete
""" Flushes and resets the codec buffers used for keeping state.
[380] Fix | Delete
[381] Fix | Delete
Calling this method should ensure that the data on the
[382] Fix | Delete
output is put into a clean state, that allows appending
[383] Fix | Delete
of new fresh data without having to rescan the whole
[384] Fix | Delete
stream to recover state.
[385] Fix | Delete
[386] Fix | Delete
"""
[387] Fix | Delete
pass
[388] Fix | Delete
[389] Fix | Delete
def seek(self, offset, whence=0):
[390] Fix | Delete
self.stream.seek(offset, whence)
[391] Fix | Delete
if whence == 0 and offset == 0:
[392] Fix | Delete
self.reset()
[393] Fix | Delete
[394] Fix | Delete
def __getattr__(self, name,
[395] Fix | Delete
getattr=getattr):
[396] Fix | Delete
[397] Fix | Delete
""" Inherit all other methods from the underlying stream.
[398] Fix | Delete
"""
[399] Fix | Delete
return getattr(self.stream, name)
[400] Fix | Delete
[401] Fix | Delete
def __enter__(self):
[402] Fix | Delete
return self
[403] Fix | Delete
[404] Fix | Delete
def __exit__(self, type, value, tb):
[405] Fix | Delete
self.stream.close()
[406] Fix | Delete
[407] Fix | Delete
###
[408] Fix | Delete
[409] Fix | Delete
class StreamReader(Codec):
[410] Fix | Delete
[411] Fix | Delete
def __init__(self, stream, errors='strict'):
[412] Fix | Delete
[413] Fix | Delete
""" Creates a StreamReader instance.
[414] Fix | Delete
[415] Fix | Delete
stream must be a file-like object open for reading
[416] Fix | Delete
(binary) data.
[417] Fix | Delete
[418] Fix | Delete
The StreamReader may use different error handling
[419] Fix | Delete
schemes by providing the errors keyword argument. These
[420] Fix | Delete
parameters are predefined:
[421] Fix | Delete
[422] Fix | Delete
'strict' - raise a ValueError (or a subclass)
[423] Fix | Delete
'ignore' - ignore the character and continue with the next
[424] Fix | Delete
'replace'- replace with a suitable replacement character;
[425] Fix | Delete
[426] Fix | Delete
The set of allowed parameter values can be extended via
[427] Fix | Delete
register_error.
[428] Fix | Delete
"""
[429] Fix | Delete
self.stream = stream
[430] Fix | Delete
self.errors = errors
[431] Fix | Delete
self.bytebuffer = ""
[432] Fix | Delete
# For str->str decoding this will stay a str
[433] Fix | Delete
# For str->unicode decoding the first read will promote it to unicode
[434] Fix | Delete
self.charbuffer = ""
[435] Fix | Delete
self.linebuffer = None
[436] Fix | Delete
[437] Fix | Delete
def decode(self, input, errors='strict'):
[438] Fix | Delete
raise NotImplementedError
[439] Fix | Delete
[440] Fix | Delete
def read(self, size=-1, chars=-1, firstline=False):
[441] Fix | Delete
[442] Fix | Delete
""" Decodes data from the stream self.stream and returns the
[443] Fix | Delete
resulting object.
[444] Fix | Delete
[445] Fix | Delete
chars indicates the number of characters to read from the
[446] Fix | Delete
stream. read() will never return more than chars
[447] Fix | Delete
characters, but it might return less, if there are not enough
[448] Fix | Delete
characters available.
[449] Fix | Delete
[450] Fix | Delete
size indicates the approximate maximum number of bytes to
[451] Fix | Delete
read from the stream for decoding purposes. The decoder
[452] Fix | Delete
can modify this setting as appropriate. The default value
[453] Fix | Delete
-1 indicates to read and decode as much as possible. size
[454] Fix | Delete
is intended to prevent having to decode huge files in one
[455] Fix | Delete
step.
[456] Fix | Delete
[457] Fix | Delete
If firstline is true, and a UnicodeDecodeError happens
[458] Fix | Delete
after the first line terminator in the input only the first line
[459] Fix | Delete
will be returned, the rest of the input will be kept until the
[460] Fix | Delete
next call to read().
[461] Fix | Delete
[462] Fix | Delete
The method should use a greedy read strategy meaning that
[463] Fix | Delete
it should read as much data as is allowed within the
[464] Fix | Delete
definition of the encoding and the given size, e.g. if
[465] Fix | Delete
optional encoding endings or state markers are available
[466] Fix | Delete
on the stream, these should be read too.
[467] Fix | Delete
"""
[468] Fix | Delete
# If we have lines cached, first merge them back into characters
[469] Fix | Delete
if self.linebuffer:
[470] Fix | Delete
self.charbuffer = "".join(self.linebuffer)
[471] Fix | Delete
self.linebuffer = None
[472] Fix | Delete
[473] Fix | Delete
if chars < 0:
[474] Fix | Delete
# For compatibility with other read() methods that take a
[475] Fix | Delete
# single argument
[476] Fix | Delete
chars = size
[477] Fix | Delete
[478] Fix | Delete
# read until we get the required number of characters (if available)
[479] Fix | Delete
while True:
[480] Fix | Delete
# can the request be satisfied from the character buffer?
[481] Fix | Delete
if chars >= 0:
[482] Fix | Delete
if len(self.charbuffer) >= chars:
[483] Fix | Delete
break
[484] Fix | Delete
# we need more data
[485] Fix | Delete
if size < 0:
[486] Fix | Delete
newdata = self.stream.read()
[487] Fix | Delete
else:
[488] Fix | Delete
newdata = self.stream.read(size)
[489] Fix | Delete
# decode bytes (those remaining from the last call included)
[490] Fix | Delete
data = self.bytebuffer + newdata
[491] Fix | Delete
try:
[492] Fix | Delete
newchars, decodedbytes = self.decode(data, self.errors)
[493] Fix | Delete
except UnicodeDecodeError, exc:
[494] Fix | Delete
if firstline:
[495] Fix | Delete
newchars, decodedbytes = self.decode(data[:exc.start], self.errors)
[496] Fix | Delete
lines = newchars.splitlines(True)
[497] Fix | Delete
if len(lines)<=1:
[498] Fix | Delete
raise
[499] Fix | Delete
It is recommended that you Edit text format, this type of Fix handles quite a lot in one request
Function