From 135529717a735b17d2b2545f34effd66ef4f3b13 Mon Sep 17 00:00:00 2001 From: richard Date: Wed, 4 Jan 2023 18:29:50 +0000 Subject: [PATCH] taking a swing at #3825 --- package/CHANGELOG | 4 ++- package/MDAnalysis/coordinates/PDB.py | 10 +++++++ .../MDAnalysisTests/coordinates/test_pdb.py | 27 ++++++++++++++++++- .../MDAnalysisTests/data/varying_occ_tmp.pdb | 10 +++++++ testsuite/MDAnalysisTests/datafiles.py | 2 ++ 5 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 testsuite/MDAnalysisTests/data/varying_occ_tmp.pdb diff --git a/package/CHANGELOG b/package/CHANGELOG index 0a34834cf4e..9cc820c6698 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,6 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ - ??/??/?? IAlibay, pgbarletta, mglagolev, hmacdope, manuel.nuno.melo, chrispfae, ooprathamm, MeetB7, BFedder * 2.5.0 @@ -31,6 +30,9 @@ Enhancements * AuxReaders are now pickle-able and copy-able (Issue #1785, PR #3887) * Add pickling support for Atom, Residue, Segment, ResidueGroup and SegmentGroup. (PR #3953) +* PDBReader now populates ts.data['tempfactor'] with the tempfactor for + each atom *for each frame*. Note, this does not affect the topology (i.e. + `AtomGroup.tempfactors` is not dynamically updated. (Issue #3825) Changes * As per NEP29 the minimum supported NumPy version has been raised to 1.21 diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 48b9fad7409..faa37b2698e 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -242,6 +242,9 @@ class PDBReader(base.ReaderBase): .. versionchanged:: 1.0.0 Raise user warning for CRYST1_ record with unitary valuse (cubic box with sides of 1 Å) and do not set cell dimensions. + .. versionchanged:: 2.5.0 + Tempfactors (aka bfactors) are now read into the ts.data dictionary each + frame. Occupancies are also read into this dictionary. """ format = ['PDB', 'ENT'] units = {'time': None, 'length': 'Angstrom'} @@ -397,6 +400,7 @@ def _read_frame(self, frame): pos = 0 occupancy = np.ones(self.n_atoms) + tempfactor = np.zeros(self.n_atoms) # Seek to start and read until start of next frame self._pdbfile.seek(start) @@ -414,6 +418,10 @@ def _read_frame(self, frame): except ValueError: # Be tolerant for ill-formated or empty occupancies pass + try: + tempfactor[pos] = line[60:66] + except ValueError: + pass pos += 1 elif line[:6] == 'CRYST1': # does an implicit str -> float conversion @@ -455,6 +463,8 @@ def _read_frame(self, frame): self.convert_pos_from_native(self.ts.dimensions[:3]) self.ts.frame = frame self.ts.data['occupancy'] = occupancy + self.ts.data['tempfactor'] = tempfactor + return self.ts def close(self): diff --git a/testsuite/MDAnalysisTests/coordinates/test_pdb.py b/testsuite/MDAnalysisTests/coordinates/test_pdb.py index 3ee73bc8fa6..8f5bbdaec62 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_pdb.py +++ b/testsuite/MDAnalysisTests/coordinates/test_pdb.py @@ -31,7 +31,7 @@ from MDAnalysisTests.coordinates.reference import (RefAdKSmall, RefAdK) from MDAnalysisTests.datafiles import (PDB, PDB_small, PDB_multiframe, - PDB_full, + PDB_full, PDB_varying, XPDB_small, PSF, DCD, CONECT, CRD, INC_PDB, PDB_xlserial, ALIGN, ENT, PDB_cm, PDB_cm_gz, PDB_cm_bz2, @@ -965,6 +965,31 @@ def test_first_residue(self, universe): assert len(universe.residues[0].atoms) == 19 +class TestPDBVaryingOccTmp: + @staticmethod + @pytest.fixture(scope='class') + def u(): + return mda.Universe(PDB_varying) + + def test_ts_data_keys(self, u): + ts = u.trajectory.ts + + assert 'occupancy' in ts.data + assert 'tempfactor' in ts.data + + def test_varying_occupancy(self, u): + u.trajectory[0] + assert u.trajectory.ts.data['occupancy'][0] == pytest.approx(1.0) + u.trajectory[1] + assert u.trajectory.ts.data['occupancy'][0] == pytest.approx(0.9) + + def test_varying_tempfactor(self, u): + u.trajectory[0] + assert u.trajectory.ts.data['tempfactor'][0] == pytest.approx(23.44) + u.trajectory[1] + assert u.trajectory.ts.data['tempfactor'][0] == pytest.approx(24.44) + + class TestIncompletePDB(object): """Tests for Issue #396 diff --git a/testsuite/MDAnalysisTests/data/varying_occ_tmp.pdb b/testsuite/MDAnalysisTests/data/varying_occ_tmp.pdb new file mode 100644 index 00000000000..09c38b999f1 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/varying_occ_tmp.pdb @@ -0,0 +1,10 @@ +HEADER This is maybe what a protein looks like from space +REMARK Contains varying occupancies and tempfactors for a single atom +MODEL 1 +CRYST1 216.489 216.489 216.489 90.00 90.00 90.00 P 1 1 +ATOM 1 N PRO A 1 0.401 40.138 17.790 1.00 23.44 N +ENDMDL +MODEL 2 +ATOM 1 N PRO A 1 1.401 41.138 18.790 0.90 24.44 N +ENDMDL +END \ No newline at end of file diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index c44471d8459..f659389b2a4 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -55,6 +55,7 @@ "PDB_conect2TER", # Conect record to a TER entry (Issue 936) "PDB_singleconect", # Conect record with one entry (Issue 937) "PDB_icodes", # stripped down version of 1osm, has icodes! + "PDB_varying", # varying occupancies and tempfactors "XPDB_small", "PDB_full", # PDB 4E43 (full HEADER, TITLE, COMPND, REMARK, altloc) "ALIGN", # Various way to align atom names in PDB files @@ -317,6 +318,7 @@ PDB_conect2TER = resource_filename(__name__, 'data/CONECT2TER.pdb') PDB_singleconect = resource_filename(__name__, 'data/SINGLECONECT.pdb') PDB_icodes = resource_filename(__name__, 'data/1osm.pdb.gz') +PDB_varying = resource_filename(__name__, 'data/varying_occ_tmp.pdb') PDB_CRYOEM_BOX = resource_filename(__name__, 'data/5a7u.pdb') PDB_CHECK_RIGHTHAND_PA = resource_filename(__name__, 'data/6msm.pdb.bz2') FHIAIMS = resource_filename(__name__, 'data/fhiaims.in')