# The use of the Unpickler memo length as the memo key is just a
# convention. The only requirement is that the memo values be unique.
# But there appears no advantage to any other scheme, and this
# scheme allows the Unpickler memo to be implemented as a plain (but
# growable) array, indexed by memo key.
assert id(obj) not in self.memo
self.write(self.put(idx))
self.memo[id(obj)] = idx, obj
# Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
return BINPUT + pack("<B", idx)
return LONG_BINPUT + pack("<I", idx)
return PUT + repr(idx).encode("ascii") + b'\n'
# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
return BINGET + pack("<B", i)
return LONG_BINGET + pack("<I", i)
return GET + repr(i).encode("ascii") + b'\n'
def save(self, obj, save_persistent_id=True):
self.framer.commit_frame()
# Check for persistent id (defined by a subclass)
pid = self.persistent_id(obj)
if pid is not None and save_persistent_id:
x = self.memo.get(id(obj))
self.write(self.get(x[0]))
reduce = getattr(self, "reducer_override", None)
# Check the type dispatch table
f(self, obj) # Call unbound method with explicit self
# Check private dispatch table if any, or else
reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
# Check for a class with a custom metaclass; treat as regular
# Check for a __reduce_ex__ method, fall back to __reduce__
reduce = getattr(obj, "__reduce_ex__", None)
reduce = getattr(obj, "__reduce__", None)
raise PicklingError("Can't pickle %r object: %r" %
# Check for string returned by reduce(), meaning "save as global"
self.save_global(obj, rv)
# Assert that reduce() returned a tuple
if not isinstance(rv, tuple):
raise PicklingError("%s must return string or tuple" % reduce)
# Assert that it returned an appropriately sized tuple
raise PicklingError("Tuple returned by %s must have "
"two to six elements" % reduce)
# Save the reduce() output and finally memoize the object
self.save_reduce(obj=obj, *rv)
def persistent_id(self, obj):
# This exists so a subclass can override it
def save_pers(self, pid):
# Save a persistent id reference
self.save(pid, save_persistent_id=False)
self.write(PERSID + str(pid).encode("ascii") + b'\n')
except UnicodeEncodeError:
"persistent IDs in protocol 0 must be ASCII strings")
def save_reduce(self, func, args, state=None, listitems=None,
dictitems=None, state_setter=None, obj=None):
# This API is called by some subclasses
if not isinstance(args, tuple):
raise PicklingError("args from save_reduce() must be a tuple")
raise PicklingError("func from save_reduce() must be callable")
func_name = getattr(func, "__name__", "")
if self.proto >= 2 and func_name == "__newobj_ex__":
if not hasattr(cls, "__new__"):
raise PicklingError("args[0] from {} args has no __new__"
if obj is not None and cls is not obj.__class__:
raise PicklingError("args[0] from {} args has the wrong class"
func = partial(cls.__new__, cls, *args, **kwargs)
elif self.proto >= 2 and func_name == "__newobj__":
# A __reduce__ implementation can direct protocol 2 or newer to
# use the more efficient NEWOBJ opcode, while still
# allowing protocol 0 and 1 to work normally. For this to
# work, the function returned by __reduce__ should be
# called __newobj__, and its first argument should be a
# class. The implementation for __newobj__
# should be as follows, although pickle has no way to
# def __newobj__(cls, *args):
# return cls.__new__(cls, *args)
# Protocols 0 and 1 will pickle a reference to __newobj__,
# while protocol 2 (and above) will pickle a reference to
# cls, the remaining args tuple, and the NEWOBJ code,
# which calls cls.__new__(cls, *args) at unpickling time
# (see load_newobj below). If __reduce__ returns a
# three-tuple, the state from the third tuple item will be
# pickled regardless of the protocol, calling __setstate__
# at unpickling time (see load_build below).
# Note that no standard __newobj__ implementation exists;
# you have to provide your own. This is to enforce
# compatibility with Python 2.2 (pickles written using
# protocol 0 or 1 in Python 2.3 should be unpicklable by
if not hasattr(cls, "__new__"):
"args[0] from __newobj__ args has no __new__")
if obj is not None and cls is not obj.__class__:
"args[0] from __newobj__ args has the wrong class")
# If the object is already in the memo, this means it is
# recursive. In this case, throw away everything we put on the
# stack, and fetch the object back from the memo.
write(POP + self.get(self.memo[id(obj)][0]))
# More new special cases (that work with older protocols as
# well): when __reduce__ returns a tuple with 4 or 5 items,
# the 4th and 5th item should be iterators that provide list
# items and dict items (as (key, value) tuples), or None.
if listitems is not None:
self._batch_appends(listitems)
if dictitems is not None:
self._batch_setitems(dictitems)
# If a state_setter is specified, call it instead of load_build
# to update obj's with its previous state.
# First, push state_setter and its tuple of expected arguments
# (obj, state) onto the stack.
save(obj) # simple BINGET opcode as obj is already memoized.
# Trigger a state_setter(obj, state) function call.
# The purpose of state_setter is to carry-out an
# inplace modification of obj. We do not care about what the
# method might return, so its output is eventually removed from
# Methods below this point are dispatched through the dispatch table
def save_none(self, obj):
dispatch[type(None)] = save_none
def save_bool(self, obj):
self.write(NEWTRUE if obj else NEWFALSE)
self.write(TRUE if obj else FALSE)
dispatch[bool] = save_bool
def save_long(self, obj):
# If the int is small enough to fit in a signed 4-byte 2's-comp
# format, we can store it more efficiently than the general
# First one- and two-byte unsigned ints:
self.write(BININT1 + pack("<B", obj))
self.write(BININT2 + pack("<H", obj))
# Next check for 4-byte signed ints:
if -0x80000000 <= obj <= 0x7fffffff:
self.write(BININT + pack("<i", obj))
encoded = encode_long(obj)
self.write(LONG1 + pack("<B", n) + encoded)
self.write(LONG4 + pack("<i", n) + encoded)
if -0x80000000 <= obj <= 0x7fffffff:
self.write(INT + repr(obj).encode("ascii") + b'\n')
self.write(LONG + repr(obj).encode("ascii") + b'L\n')
dispatch[int] = save_long
def save_float(self, obj):
self.write(BINFLOAT + pack('>d', obj))
self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
dispatch[float] = save_float
def save_bytes(self, obj):
if not obj: # bytes object is empty
self.save_reduce(bytes, (), obj=obj)
self.save_reduce(codecs.encode,
(str(obj, 'latin1'), 'latin1'), obj=obj)
self.write(SHORT_BINBYTES + pack("<B", n) + obj)
elif n > 0xffffffff and self.proto >= 4:
self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
elif n >= self.framer._FRAME_SIZE_TARGET:
self._write_large_bytes(BINBYTES + pack("<I", n), obj)
self.write(BINBYTES + pack("<I", n) + obj)
dispatch[bytes] = save_bytes
def save_bytearray(self, obj):
if not obj: # bytearray is empty
self.save_reduce(bytearray, (), obj=obj)
self.save_reduce(bytearray, (bytes(obj),), obj=obj)
if n >= self.framer._FRAME_SIZE_TARGET:
self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
self.write(BYTEARRAY8 + pack("<Q", n) + obj)
dispatch[bytearray] = save_bytearray
def save_picklebuffer(self, obj):
raise PicklingError("PickleBuffer can only pickled with "
raise PicklingError("PickleBuffer can not be pickled when "
"pointing to a non-contiguous buffer")
if self._buffer_callback is not None:
in_band = bool(self._buffer_callback(obj))
# XXX The C implementation avoids a copy here
self.save_bytes(m.tobytes())
self.save_bytearray(m.tobytes())
self.write(READONLY_BUFFER)
dispatch[PickleBuffer] = save_picklebuffer
encoded = obj.encode('utf-8', 'surrogatepass')
if n <= 0xff and self.proto >= 4:
self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
elif n > 0xffffffff and self.proto >= 4:
self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
elif n >= self.framer._FRAME_SIZE_TARGET:
self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
self.write(BINUNICODE + pack("<I", n) + encoded)
obj = obj.replace("\\", "\\u005c")
obj = obj.replace("\0", "\\u0000")
obj = obj.replace("\n", "\\u000a")
obj = obj.replace("\r", "\\u000d")
obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
self.write(UNICODE + obj.encode('raw-unicode-escape') +
def save_tuple(self, obj):
if not obj: # tuple is empty
if n <= 3 and self.proto >= 2:
# Subtle. Same as in the big comment below.
get = self.get(memo[id(obj)][0])
self.write(POP * n + get)
self.write(_tuplesize2code[n])
# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
# has more than 3 elements.
# Subtle. d was not in memo when we entered save_tuple(), so
# the process of saving the tuple's elements must have saved
# the tuple itself: the tuple is recursive. The proper action
# now is to throw away everything we put on the stack, and
# simply GET the tuple (it's already constructed). This check
# could have been done in the "for element" loop instead, but
# recursive tuples are a rare thing.
get = self.get(memo[id(obj)][0])
else: # proto 0 -- POP_MARK not available
dispatch[tuple] = save_tuple
def save_list(self, obj):
else: # proto 0 -- can't use EMPTY_LIST
dispatch[list] = save_list
def _batch_appends(self, items):
# Helper to batch up APPENDS sequences
tmp = list(islice(it, self._BATCHSIZE))
# else tmp is empty, and we're done
def save_dict(self, obj):
else: # proto 0 -- can't use EMPTY_DICT
self._batch_setitems(obj.items())
dispatch[dict] = save_dict
if PyStringMap is not None:
dispatch[PyStringMap] = save_dict
def _batch_setitems(self, items):
# Helper to batch up SETITEMS sequences; proto >= 1 only
tmp = list(islice(it, self._BATCHSIZE))