Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

backport fix for #2878 (pickle DCD and XDR readers at correct frame) #2908

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ The rules for this file:
* release numbers follow "Semantic Versioning" http://semver.org

------------------------------------------------------------------------------
??/??/20 orbeckst, VOD555, lilyminium
??/??/20 orbeckst, VOD555, lilyminium, yuxuanzhuang

* 1.0.1

Expand All @@ -24,6 +24,7 @@ Fixes
density=True; the keyword was available since 0.19.0 but with incorrect
semantics and not documented and did not produce correct results (Issue
#2811, PR #2812)
* pickle correct frames of DCD, TRR, and XTC (#2878)


06/09/20 richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira,
Expand Down
3 changes: 2 additions & 1 deletion package/MDAnalysis/lib/formats/libdcd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ cdef class DCDFile:
return

current_frame = state[1]
self.seek(current_frame)
self.seek(current_frame - 1)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these lines make the low-level tests fail?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah...it messed up with the position of the file.

The fix is ditching current changes, and changing line 410 to:

        if frame >= self.n_frames + 1:

Besides, I don't think current tests cover for the "true" last frame i.e.:

        traj = DCDReader(DCD)
        traj[-1]  #  move to last frame
        dcd = traj._file
        dcd.tell() == len(dcd)  #  instead of len(dcd) - 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.seek(current_frame - 1)
self.seek(current_frame)

self.current_frame = current_frame
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
self.current_frame = current_frame



def tell(self):
Expand Down
3 changes: 2 additions & 1 deletion package/MDAnalysis/lib/formats/libmdaxdr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,8 @@ cdef class _XDRFile:

# where was I
current_frame = state[1]
self.seek(current_frame)
self.seek(current_frame - 1)
self.current_frame = current_frame
Comment on lines +309 to +310
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these lines make the low-level tests fail?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current tests can pass if these changes are ditched, but still the last frame cannot be easily pickled as seek here is sort of different from that in DCD files.


def seek(self, frame):
"""Seek to Frame.
Expand Down
2 changes: 1 addition & 1 deletion package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
from commands import getoutput

# NOTE: keep in sync with MDAnalysis.__version__ in version.py
RELEASE = "1.0.0"
RELEASE = "1.0.1-dev"

is_release = 'dev' not in RELEASE

Expand Down
3 changes: 2 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,12 +419,12 @@ def test_transformations_copy(self,ref,transformed):
ideal_coords = ref.iter_ts(i).positions + v1 + v2
assert_array_almost_equal(ts.positions, ideal_coords, decimal = ref.prec)


def test_add_another_transformations_raises_ValueError(self, transformed):
# After defining the transformations, the workflow cannot be changed
with pytest.raises(ValueError):
transformed.add_transformations(translate([2,2,2]))


class MultiframeReaderTest(BaseReaderTest):
def test_last_frame(self, ref, reader):
ts = reader[-1]
Expand Down Expand Up @@ -491,6 +491,7 @@ def test_iter_as_aux_lowf(self, ref, reader):
decimal=ref.prec)



class BaseWriterTest(object):
@staticmethod
@pytest.fixture()
Expand Down
39 changes: 31 additions & 8 deletions testsuite/MDAnalysisTests/formats/test_libdcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,25 +84,48 @@ def dcd():
with DCDFile(DCD) as dcd:
yield dcd

def _assert_compare_readers(old_reader, new_reader):
frame = old_reader.read() # same as next(old_reader)
new_frame = new_reader.read() # same as next(new_reader)

def test_pickle(dcd):
dcd.seek(len(dcd) - 1)
dump = pickle.dumps(dcd)
new_dcd = pickle.loads(dump)
assert old_reader.fname == new_reader.fname
assert old_reader.tell() == new_reader.tell()
assert_almost_equal(frame.xyz, new_frame.xyz)
assert_almost_equal(frame.unitcell, new_frame.unitcell)
Comment on lines +93 to +94
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added tests to check the contents of the current frame


assert dcd.fname == new_dcd.fname
assert dcd.tell() == new_dcd.tell()
def test_pickle(dcd):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make the generic pickle test start somewhere in the middle of the trajectory

mid = len(dcd) // 2
dcd.seek(mid)
new_dcd = pickle.loads(pickle.dumps(dcd))
_assert_compare_readers(dcd, new_dcd)

def test_pickle_last(dcd):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

specific test for last frame (this was the original test_pickle test)

dcd.seek(len(dcd) - 1)
new_dcd = pickle.loads(pickle.dumps(dcd))
_assert_compare_readers(dcd, new_dcd)

def test_pickle_closed(dcd):
dcd.seek(len(dcd) - 1)
dcd.close()
dump = pickle.dumps(dcd)
new_dcd = pickle.loads(dump)
new_dcd = pickle.loads(pickle.dumps(dcd))

assert dcd.fname == new_dcd.fname
assert dcd.tell() != new_dcd.tell()

def test_pickle_after_read(dcd):
_ = dcd.read()
new_dcd = pickle.loads(pickle.dumps(dcd))
_assert_compare_readers(dcd, new_dcd)

#@pytest.mark.xfail
def test_pickle_immediately(dcd):
# do not seek before pickling: this seems to leave the DCDFile
# object in weird state: is this supposed to work?
new_dcd = pickle.loads(pickle.dumps(dcd))
Comment on lines +121 to +124
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pickle always fails if we have never done a seek() before attempting to pickle.

______________________________________ test_pickle_immediately _______________________________________

dcd = <MDAnalysis.lib.formats.libdcd.DCDFile object at 0x12a1e5890>

    def test_pickle_immediately(dcd):
        # do not seek before pickling: this seems to leave the DCDFile
        # object in weird state: is this supposed to work?
>       new_dcd = pickle.loads(pickle.dumps(dcd))

testsuite/MDAnalysisTests/formats/test_libdcd.py:124:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
MDAnalysis/lib/formats/libdcd.pyx:268: in MDAnalysis.lib.formats.libdcd.DCDFile.__setstate__
    ???
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

>   ???
E   OSError: DCD seek failed with DCD error=Normal EOF

MDAnalysis/lib/formats/libdcd.pyx:422: OSError

I find this behavior surprising (as a user) but maybe I don't understand if this is expected. But in that case we should raise an appropriate exception.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, thanks, then that's a bug.

Copy link
Member Author

@orbeckst orbeckst Aug 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Except that it is only a bug in this PR... because it works with 1.0.0 and current develop

In [1]: import MDAnalysis as mda
In [2]: mda.__version__
Out[2]: '1.0.0'
In [3]: import MDAnalysis as mda; from MDAnalysis.tests import datafiles as data

For dcd

In [13]: dcd = mda.lib.formats.libdcd.DCDFile(data.COORDINATES_DCD)
In [14]: pickle.dumps(dcd)
Out[14]: b'\x80\x03cMDAnalysis.lib.formats.libdcd\nDCDFile\nq\x00Xg\x00\x00\x00~/anaconda3/envs/mda3/lib/python3.6/site-packages/MDAnalysisTests/data/coordinates/test.dcdq\x01X\x01\x00\x00\x00rq\x02\x86q\x03Rq\x04K\x01K\x00\x86q\x05b.'
In [15]: pickle.loads(pickle.dumps(dcd))
Out[15]: <MDAnalysis.lib.formats.libdcd.DCDFile at 0x118d75678>

and also works for trr

In [8]: trr = mda.lib.formats.libmdaxdr.TRRFile(data.COORDINATES_TRR)
In [9]: pickle.dumps(trr)
Out[9]: b'\x80\x03cMDAnalysis.lib.formats.libmdaxdr\nTRRFile\nq\x00Xg\x00\x00\x00~/anaconda3/envs/mda3/lib/python3.6/site-packages/MDAnalysisTests/data/coordinates/test.trrq\x01X\x01\x00\x00\x00rq\x02\x86q\x03Rq\x04(K\x01K\x00NK\x00tq\x05b.'
In [10]: pickle.loads(pickle.dumps(trr))
Out[10]: <MDAnalysis.lib.formats.libmdaxdr.TRRFile at 0x118c0d438>

EDIT: added loading and fixed DCD example

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The approach I took here is flawed (so it also shouldn't work in current develop--when pickle.loading).

Copy link
Member Author

@orbeckst orbeckst Aug 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I need to check what commit I used for this:

In [13]: dcd = mda.lib.formats.libdcd.DCDFile(data.COORDINATES_DCD)

In [14]: pickle.dumps(dcd)
Out[14]: b'\x80\x03cMDAnalysis.lib.formats.libdcd\nDCDFile\nq\x00Xj\x00\x00\x00~/anaconda3/envs/mda3dev/lib/python3.7/site-packages/MDAnalysisTests/data/coordinates/test.dcdq\x01X\x01\x00\x00\x00rq\x02\x86q\x03Rq\x04K\x01K\x00\x86q\x05b.'

In [15]: mda.__version__
Out[15]: '2.0.0-dev0'

In [16]: pickle.loads(pickle.dumps(dcd))
Out[16]: <MDAnalysis.lib.formats.libdcd.DCDFile at 0x1268ffb90>

Copy link
Contributor

@yuxuanzhuang yuxuanzhuang Aug 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oddly enough, data.DCD (which is used in the testsuite) fails at pickling while data.COORDINATES_DCD fails at reading:

>>> dcd = mda.lib.formats.libdcd.DCDFile(data.COORDINATES_DCD)
>>> read_p = pickle.loads(pickle.dumps(dcd))
>>> read_p.read()
OSError: Reading DCD header failed: format of DCD file is wrong


assert dcd.fname == new_dcd.fname
assert dcd.tell() == new_dcd.tell()


@pytest.mark.parametrize("new_frame", (10, 42, 21))
def test_seek_normal(new_frame, dcd):
Expand Down
33 changes: 24 additions & 9 deletions testsuite/MDAnalysisTests/formats/test_libmdaxdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import numpy as np

from numpy.testing import (assert_almost_equal, assert_array_almost_equal,
assert_array_equal)
assert_array_equal, assert_equal)

from MDAnalysis.lib.formats.libmdaxdr import TRRFile, XTCFile

Expand Down Expand Up @@ -128,20 +128,35 @@ def test_read_write_mode_file(self, xdr, tmpdir, fname):
with pytest.raises(IOError):
f.read()

@staticmethod
def _assert_compare_readers(old_reader, new_reader):
frame = old_reader.read()
new_frame = new_reader.read()
Comment on lines +133 to +134
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

read() or next(reader) is the only way to get the current frame


assert old_reader.fname == new_reader.fname
assert old_reader.tell() == new_reader.tell()

assert_equal(old_reader.offsets, new_reader.offsets)
assert_almost_equal(frame.x, new_frame.x)
assert_almost_equal(frame.box, new_frame.box)
assert frame.step == new_frame.step
assert_almost_equal(frame.time, new_frame.time)
Comment on lines +139 to +143
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added tests for contents of the frame


def test_pickle(self, reader):
reader.seek(len(reader) - 1)
dump = pickle.dumps(reader)
new_reader = pickle.loads(dump)
mid = len(reader) // 2
reader.seek(mid)
new_reader = pickle.loads(pickle.dumps(reader))
self._assert_compare_readers(reader, new_reader)

assert reader.fname == new_reader.fname
assert reader.tell() == new_reader.tell()
assert_almost_equal(reader.offsets, new_reader.offsets)
def test_pickle_last_frame(self, reader):
reader.seek(len(reader) - 1)
new_reader = pickle.loads(pickle.dumps(reader))
self._assert_compare_readers(reader, new_reader)

def test_pickle_closed(self, reader):
reader.seek(len(reader) - 1)
reader.close()
dump = pickle.dumps(reader)
new_reader = pickle.loads(dump)
new_reader = pickle.loads(pickle.dumps(reader))

assert reader.fname == new_reader.fname
assert reader.tell() != new_reader.tell()
Expand Down
2 changes: 1 addition & 1 deletion testsuite/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def run(self):

if __name__ == '__main__':
# this must be in-sync with MDAnalysis
RELEASE = "1.0.0"
RELEASE = "1.0.1-dev"

with open("README") as summary:
LONG_DESCRIPTION = summary.read()
Expand Down