Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix bug pickling namedtuple. #113

Merged
merged 6 commits into from
Oct 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 51 additions & 15 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,32 @@ def _builtin_type(name):
return getattr(types, name)


def _make__new__factory(type_):
def _factory():
return type_.__new__
return _factory


# NOTE: These need to be module globals so that they're pickleable as globals.
_get_dict_new = _make__new__factory(dict)
_get_frozenset_new = _make__new__factory(frozenset)
_get_list_new = _make__new__factory(list)
_get_set_new = _make__new__factory(set)
_get_tuple_new = _make__new__factory(tuple)
_get_object_new = _make__new__factory(object)

# Pre-defined set of builtin_function_or_method instances that can be
# serialized.
_BUILTIN_TYPE_CONSTRUCTORS = {
dict.__new__: _get_dict_new,
frozenset.__new__: _get_frozenset_new,
set.__new__: _get_set_new,
list.__new__: _get_list_new,
tuple.__new__: _get_tuple_new,
object.__new__: _get_object_new,
}


if sys.version_info < (3, 4):
def _walk_global_ops(code):
"""
Expand Down Expand Up @@ -306,6 +332,18 @@ def save_function(self, obj, name=None):
Determines what kind of function obj is (e.g. lambda, defined at
interactive prompt, etc) and handles the pickling appropriately.
"""
if obj in _BUILTIN_TYPE_CONSTRUCTORS:
# We keep a special-cased cache of built-in type constructors at
# global scope, because these functions are structured very
# differently in different python versions and implementations (for
# example, they're instances of types.BuiltinFunctionType in
# CPython, but they're ordinary types.FunctionType instances in
# PyPy).
#
# If the function we've received is in that cache, we just
# serialize it as a lookup into the cache.
return self.save_reduce(_BUILTIN_TYPE_CONSTRUCTORS[obj], (), obj=obj)

write = self.write

if name is None:
Expand All @@ -332,7 +370,7 @@ def save_function(self, obj, name=None):
return self.save_global(obj, name)

# a builtin_function_or_method which comes in as an attribute of some
# object (e.g., object.__new__, itertools.chain.from_iterable) will end
# object (e.g., itertools.chain.from_iterable) will end
# up with modname "__main__" and so end up here. But these functions
# have no __code__ attribute in CPython, so the handling for
# user-defined functions below will fail.
Expand Down Expand Up @@ -408,15 +446,18 @@ def save_dynamic_class(self, obj):
from global modules.
"""
clsdict = dict(obj.__dict__) # copy dict proxy to a dict
if not isinstance(clsdict.get('__dict__', None), property):
# don't extract dict that are properties
clsdict.pop('__dict__', None)
clsdict.pop('__weakref__', None)
clsdict.pop('__weakref__', None)
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This now unconditionally pops __weakref__, since it's not meaningful to pickle it.


# hack as __new__ is stored differently in the __dict__
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what this was trying to do previously, but it causes unpickling of namedtuples to not work, and the test suite still passes for me without it.

new_override = clsdict.get('__new__', None)
if new_override:
clsdict['__new__'] = obj.__new__
# On PyPy, __doc__ is a readonly attribute, so we need to include it in
# the initial skeleton class. This is safe because we know that the
# doc can't participate in a cycle with the original class.
type_kwargs = {'__doc__': clsdict.pop('__doc__', None)}

# If type overrides __dict__ as a property, include it in the type kwargs.
# In Python 2, we can't set this attribute after construction.
__dict__ = clsdict.pop('__dict__', None)
if isinstance(__dict__, property):
type_kwargs['__dict__'] = __dict__

save = self.save
write = self.write
Expand All @@ -439,17 +480,12 @@ def save_dynamic_class(self, obj):
# Mark the start of the args for the rehydration function.
write(pickle.MARK)

# On PyPy, __doc__ is a readonly attribute, so we need to include it in
# the initial skeleton class. This is safe because we know that the
# doc can't participate in a cycle with the original class.
doc_dict = {'__doc__': clsdict.pop('__doc__', None)}

# Create and memoize an empty class with obj's name and bases.
save(type(obj))
save((
obj.__name__,
obj.__bases__,
doc_dict,
type_kwargs,
))
write(pickle.REDUCE)
self.memoize(obj)
Expand Down
24 changes: 23 additions & 1 deletion tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import division

import abc

import collections
import base64
import functools
import imp
Expand Down Expand Up @@ -701,5 +701,27 @@ def test_function_module_name(self):
func = lambda x: x
self.assertEqual(pickle_depickle(func).__module__, func.__module__)

def test_namedtuple(self):

MyTuple = collections.namedtuple('MyTuple', ['a', 'b', 'c'])
t = MyTuple(1, 2, 3)

depickled_t, depickled_MyTuple = pickle_depickle([t, MyTuple])
self.assertTrue(isinstance(depickled_t, depickled_MyTuple))

self.assertEqual((depickled_t.a, depickled_t.b, depickled_t.c), (1, 2, 3))
self.assertEqual((depickled_t[0], depickled_t[1], depickled_t[2]), (1, 2, 3))

self.assertEqual(depickled_MyTuple.__name__, 'MyTuple')
self.assertTrue(issubclass(depickled_MyTuple, tuple))

def test_builtin_type__new__(self):
# Functions occasionally take the __new__ of these types as default
# parameters for factories. For example, on Python 3.3,
# `tuple.__new__` is a default value for some methods of namedtuple.
for t in list, tuple, set, frozenset, dict, object:
self.assertTrue(pickle_depickle(t.__new__) is t.__new__)


if __name__ == '__main__':
unittest.main()