diff --git a/package/CHANGELOG b/package/CHANGELOG index d1a648d35cf..5e176027119 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -13,7 +13,7 @@ The rules for this file: * release numbers follow "Semantic Versioning" http://semver.org ------------------------------------------------------------------------------ -??/??/?? IAlibay, ianmkenney, PicoCentauri +??/??/?? IAlibay, ianmkenney, PicoCentauri, pgbarletta * 2.7.0 @@ -24,6 +24,7 @@ Fixes Enhancements * Added a warning about charge neutrality to the documentation of `DielectricConstant` (Issue #4262, PR #4263) + * Add support for reading chainID info from prmtop amber topologies (PR #4007) Changes * The `mda-xdrlib` module is now a core dependency of MDAnalysis diff --git a/package/MDAnalysis/topology/TOPParser.py b/package/MDAnalysis/topology/TOPParser.py index 7e45727309b..a94a39a950e 100644 --- a/package/MDAnalysis/topology/TOPParser.py +++ b/package/MDAnalysis/topology/TOPParser.py @@ -78,7 +78,7 @@ :mod:`MDAnalysis.topology.guessers` for more details. .. _`PARM parameter/topology file specification`: - http://ambermd.org/formats.html#topology + https://ambermd.org/FileFormats.php#topo.cntrl Classes ------- @@ -93,7 +93,7 @@ from .tables import Z2SYMB from ..lib.util import openany, FORTRANReader -from .base import TopologyReaderBase +from .base import TopologyReaderBase, change_squash from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, @@ -106,6 +106,7 @@ Resids, Resnums, Segids, + ChainIDs, AtomAttr, Bonds, Angles, @@ -140,13 +141,20 @@ class TOPParser(TopologyReaderBase): - Bonds - Angles - Dihedrals (inc. impropers) + - ChainIDs (from %RESIDUE_CHAINID) + - Segids (from %RESIDUE_CHAINID) The format is defined in `PARM parameter/topology file specification`_. The reader tries to detect if it is a newer (AMBER 12?) file format by looking for the flag "ATOMIC_NUMBER". .. _`PARM parameter/topology file specification`: - http://ambermd.org/formats.html#topology + https://ambermd.org/FileFormats.php#topo.cntrl + + Additionally, the RESIDUE_CHAINID non-standard flag is supported. This + can be added with the `addPDB`_ command from parmed: + + .. _`addPDB`: https://parmed.github.io/ParmEd/html/parmed.html#addpdb Notes ----- @@ -162,6 +170,8 @@ class TOPParser(TopologyReaderBase): warns users that chamber-style topologies are not currently supported .. versionchanged:: 2.0.0 no longer guesses elements if missing + .. versionchanged:: 2.7.0 + gets Segments and chainIDs from flag RESIDUE_CHAINID, when present """ format = ['TOP', 'PRMTOP', 'PARM7'] @@ -188,7 +198,8 @@ def parse(self, **kwargs): "ANGLES_INC_HYDROGEN": (4, 10, self.parse_bonded, "angh", 4), "ANGLES_WITHOUT_HYDROGEN": (4, 10, self.parse_bonded, "anga", 5), "DIHEDRALS_INC_HYDROGEN": (5, 10, self.parse_bonded, "dihh", 6), - "DIHEDRALS_WITHOUT_HYDROGEN": (5, 10, self.parse_bonded, "diha", 7) + "DIHEDRALS_WITHOUT_HYDROGEN": (5, 10, self.parse_bonded, "diha", 7), + "RESIDUE_CHAINID": (1, 20, self.parse_chainids, "segids", 11), } attrs = {} # empty dict for attrs that we'll fill @@ -298,12 +309,36 @@ def next_getter(): attrs['atomids'] = Atomids(np.arange(n_atoms) + 1) attrs['resids'] = Resids(np.arange(n_res) + 1) attrs['resnums'] = Resnums(np.arange(n_res) + 1) - attrs['segids'] = Segids(np.array(['SYSTEM'], dtype=object)) - top = Topology(n_atoms, n_res, 1, - attrs=list(attrs.values()), - atom_resindex=residx, - residue_segindex=None) + # Amber's 'RESIDUE_CHAINID' is a by-residue attribute, turn it into + # a by-atom attribute when present. See PR #4007. + if "segids" in attrs and len(attrs["segids"]) == n_res: + segidx, (segids,) = change_squash((attrs["segids"],), (attrs["segids"],)) + chainids = [attrs["segids"][r] for r in residx] + + attrs["segids"] = Segids(segids) + attrs["ChainIDs"] = ChainIDs(chainids) + n_segs = len(segids) + else: + if "segids" in attrs: + msg = ( + f"Number of residues ({n_res}) does not match number of " + f"%RESIDUE_CHAINID ({len(attrs['segids'])}). Skipping section." + ) + logger.warning(msg) + warnings.warn(msg) + attrs["segids"] = Segids(np.array(["SYSTEM"], dtype=object)) + segidx = None + n_segs = 1 + + top = Topology( + n_atoms, + n_res, + n_segs, + attrs=list(attrs.values()), + atom_resindex=residx, + residue_segindex=segidx, + ) return top @@ -537,10 +572,12 @@ def parse_bonded(self, num_per_record, numlines): Note ---- For the bond/angle sections of parm7 files, the atom numbers are set to - coordinate array index values. As detailed in - http://ambermd.org/formats.html to recover the actual atom number, one + coordinate array index values. As detailed in `the specification`_, + to recover the actual atom number, one should divide the values by 3 and add 1. Here, since we want to satisfy zero-indexing, we only divide by 3. + + .. _`the specification`: https://ambermd.org/FileFormats.php#topo.cntrl """ fields = self.parsesection_mapper(numlines, lambda x: int(x) // 3) section = self.parse_chunks(fields, num_per_record) @@ -563,9 +600,24 @@ def parsesection_mapper(self, numlines, mapper): A list of all entries in a given parm7 section """ section = [] - y = next(self.topfile).strip("%FORMAT(") - y.strip(")") - x = FORTRANReader(y) + + def get_fmt(file): + """ Skips '%COMMENT' lines until it gets the FORMAT specification + for the section.""" + line = next(file) + if line[:7] == "%FORMAT": + return line[8:].split(")")[0] + elif line[:8] == "%COMMENT": + return get_fmt(file) + else: + raise ValueError( + "Invalid header line. Does not begin with either %FLAG, %FORMAT " + f"nor %COMMENT:\n{line}" + ) + + # There may be %COMMENT lines between %FLAG and %FORMAT statements. Skip them. + fmt = get_fmt(self.topfile) + x = FORTRANReader(fmt) for i in range(numlines): l = next(self.topfile) for j in range(len(x.entries)): @@ -596,7 +648,7 @@ def parse_dihedrals(self, diha, dihh): Note ---- - As detailed in http://ambermd.org/formats.html, the dihedral sections + As detailed in `the specification`_, the dihedral sections of parm7 files contain information about both conventional dihedrals and impropers. The following must be accounted for: 1) If the fourth atom in a dihedral entry is given a negative value, @@ -605,6 +657,8 @@ def parse_dihedrals(self, diha, dihh): this indicates that it 1-4 NB interactions are ignored for this dihedrals. This could be due to the dihedral within a ring, or if it is part of a multi-term dihedral definition or if it is an improper. + + .. _`the specification`: https://ambermd.org/FileFormats.php#topo.cntrl """ improp = [] dihed = [] @@ -620,3 +674,23 @@ def parse_dihedrals(self, diha, dihh): dihedrals = Dihedrals(dihed) impropers = Impropers(improp) return dihedrals, impropers + + def parse_chainids(self, num_per_record: int, numlines: int): + """Extracts the chainID of each residue + + Parameters + ---------- + num_per_record : int + The number of entries for each record in section (unused input) + numlines : int + The number of lines to be parsed in current section + + Returns + ------- + attr : numpy array + A numpy array containing the chainID of each residue as defined in + the parm7 file + """ + vals = self.parsesection_mapper(numlines, lambda x: x) + attr = np.array(vals) + return attr diff --git a/testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.error4.parm7 b/testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.error4.parm7 new file mode 100644 index 00000000000..3de8b764886 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/Amber/ace_mbondi3.error4.parm7 @@ -0,0 +1,144 @@ +%VERSION VERSION_STAMP = V0001.000 DATE = 09/08/18 15:36:17 +%FLAG TITLE +%FORMAT(20a4) +ACE +%FLAG POINTERS +%FORMAT(10I8) + 6 4 3 2 6 1 9 0 0 0 + 16 1 2 1 0 3 3 3 4 0 + 0 0 0 0 0 0 0 0 6 0 + 0 +%FLAG ATOM_NAME +%FORMAT(20a4) +HH31CH3 HH32HH33C O +%FLAG CHARGE +%COMMENT +%BAD LINE +%COMMENT +%FORMAT(5E16.8) + 2.04636429E+00 -6.67300626E+00 2.04636429E+00 2.04636429E+00 1.08823576E+01 + -1.03484442E+01 +%FLAG ATOMIC_NUMBER +%FORMAT(10I8) + 1 6 1 1 6 8 +%FLAG MASS +%FORMAT(5E16.8) + 1.00800000E+00 1.20100000E+01 1.00800000E+00 1.00800000E+00 1.20100000E+01 + 1.60000000E+01 +%FLAG ATOM_TYPE_INDEX +%FORMAT(10I8) + 1 2 1 1 3 4 +%FLAG NUMBER_EXCLUDED_ATOMS +%FORMAT(10I8) + 5 4 3 2 1 1 +%FLAG NONBONDED_PARM_INDEX +%FORMAT(10I8) + 1 2 4 7 2 3 5 8 4 5 + 6 9 7 8 9 10 +%FLAG RESIDUE_LABEL +%FORMAT(20a4) +ACE +%FLAG RESIDUE_POINTER +%FORMAT(10I8) + 1 +%FLAG BOND_FORCE_CONSTANT +%FORMAT(5E16.8) + 5.70000000E+02 3.40000000E+02 3.17000000E+02 +%FLAG BOND_EQUIL_VALUE +%FORMAT(5E16.8) + 1.22900000E+00 1.09000000E+00 1.52200000E+00 +%FLAG ANGLE_FORCE_CONSTANT +%FORMAT(5E16.8) + 5.00000000E+01 3.50000000E+01 8.00000000E+01 +%FLAG ANGLE_EQUIL_VALUE +%FORMAT(5E16.8) + 1.91113635E+00 1.91113635E+00 2.10137732E+00 +%FLAG DIHEDRAL_FORCE_CONSTANT +%FORMAT(5E16.8) + 8.00000000E-01 0.00000000E+00 8.00000000E-02 +%FLAG DIHEDRAL_PERIODICITY +%FORMAT(5E16.8) + 1.00000000E+00 2.00000000E+00 3.00000000E+00 +%FLAG DIHEDRAL_PHASE +%FORMAT(5E16.8) + 0.00000000E+00 0.00000000E+00 3.14159400E+00 +%FLAG SCEE_SCALE_FACTOR +%FORMAT(5E16.8) + 1.20000000E+00 1.20000000E+00 1.20000000E+00 +%FLAG SCNB_SCALE_FACTOR +%FORMAT(5E16.8) + 2.00000000E+00 2.00000000E+00 2.00000000E+00 +%FLAG SOLTY +%FORMAT(5E16.8) + 0.00000000E+00 0.00000000E+00 0.00000000E+00 0.00000000E+00 +%FLAG LENNARD_JONES_ACOEF +%FORMAT(5E16.8) + 7.51607703E+03 9.71708117E+04 1.04308023E+06 8.61541883E+04 9.24822270E+05 + 8.19971662E+05 5.44261042E+04 6.47841731E+05 5.74393458E+05 3.79876399E+05 +%FLAG LENNARD_JONES_BCOEF +%FORMAT(5E16.8) + 2.17257828E+01 1.26919150E+02 6.75612247E+02 1.12529845E+02 5.99015525E+02 + 5.31102864E+02 1.11805549E+02 6.26720080E+02 5.55666448E+02 5.64885984E+02 +%FLAG BONDS_INC_HYDROGEN +%FORMAT(10I8) + 3 6 2 3 9 2 0 3 2 +%FLAG BONDS_WITHOUT_HYDROGEN +%FORMAT(10I8) + 12 15 1 3 12 3 +%FLAG ANGLES_INC_HYDROGEN +%FORMAT(10I8) + 9 3 12 1 6 3 9 2 6 3 + 12 1 0 3 6 2 0 3 9 2 + 0 3 12 1 +%FLAG ANGLES_WITHOUT_HYDROGEN +%FORMAT(10I8) + 3 12 15 3 +%FLAG DIHEDRALS_INC_HYDROGEN +%FORMAT(10I8) + 9 3 12 15 1 9 3 -12 15 2 + 9 3 -12 15 3 6 3 12 15 1 + 6 3 -12 15 2 6 3 -12 15 3 + 0 3 12 15 1 0 3 -12 15 2 + 0 3 -12 15 3 +%FLAG DIHEDRALS_WITHOUT_HYDROGEN +%FORMAT(10I8) + +%FLAG EXCLUDED_ATOMS_LIST +%FORMAT(10I8) + 2 3 4 5 6 3 4 5 6 4 + 5 6 5 6 6 0 +%FLAG HBOND_ACOEF +%FORMAT(5E16.8) + +%FLAG HBOND_BCOEF +%FORMAT(5E16.8) + +%FLAG HBCUT +%FORMAT(5E16.8) + +%FLAG AMBER_ATOM_TYPE +%FORMAT(20a4) +HC CT HC HC C O +%FLAG TREE_CHAIN_CLASSIFICATION +%FORMAT(20a4) +M M E E M E +%FLAG JOIN_ARRAY +%FORMAT(10I8) + 0 0 0 0 0 0 +%FLAG IROTAT +%FORMAT(10I8) + 0 0 0 0 0 0 +%FLAG RADIUS_SET +%FORMAT(1a80) +ArgH and AspGluO modified Bondi2 radii (mbondi3) +%FLAG RADII +%FORMAT(5E16.8) + 1.20000000E+00 1.70000000E+00 1.20000000E+00 1.20000000E+00 1.70000000E+00 + 1.50000000E+00 +%FLAG SCREEN +%FORMAT(5E16.8) + 8.50000000E-01 7.20000000E-01 8.50000000E-01 8.50000000E-01 7.20000000E-01 + 8.50000000E-01 +%FLAG IPOL +%FORMAT(1I8) + 0 diff --git a/testsuite/MDAnalysisTests/data/Amber/ache_chainid.error5.prmtop.bz2 b/testsuite/MDAnalysisTests/data/Amber/ache_chainid.error5.prmtop.bz2 new file mode 100644 index 00000000000..90766f998e1 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/Amber/ache_chainid.error5.prmtop.bz2 differ diff --git a/testsuite/MDAnalysisTests/data/Amber/ache_chainid.prmtop.bz2 b/testsuite/MDAnalysisTests/data/Amber/ache_chainid.prmtop.bz2 new file mode 100644 index 00000000000..b19c7c016fc Binary files /dev/null and b/testsuite/MDAnalysisTests/data/Amber/ache_chainid.prmtop.bz2 differ diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index caedb9a540c..68206ad770d 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -96,7 +96,10 @@ "XYZ", "XYZ_psf", "XYZ_bz2", "XYZ_mini", "XYZ_five", # 3 and 5 atoms xyzs for an easy topology "TXYZ", "ARC", "ARC_PBC", # Tinker files - "PRM", "TRJ", "TRJ_bz2", # Amber (no periodic box) + "PRM", + "PRM_chainid_bz2", + "TRJ", + "TRJ_bz2", # Amber (no periodic box) "INPCRD", "PRMpbc", "TRJpbc_bz2", # Amber (periodic box) "PRM7", "NCDFtruncoct", # Amber (cpptrj test trajectory, see Issue 488) @@ -108,7 +111,11 @@ "PRMNCRST", # Amber ncrst with positions/forces/velocities "PRM_NCBOX", "TRJ_NCBOX", # Amber parm7 + nc w/ pos/forces/vels/box "PRMNEGATIVE", # Amber negative ATOMIC_NUMBER (Issue 2306) - "PRMErr1", "PRMErr2", "PRMErr3", # Amber TOP files to check raised errors + "PRMErr1", # Amber TOP files to check raised errors + "PRMErr2", + "PRMErr3", + "PRMErr4", + "PRMErr5", "PRM_UreyBradley", # prmtop from ParmEd test files with Urey-Bradley angles "PRM7_ala2", "RST7_ala2", # prmtop and rst files from ParmEd example files "PRM19SBOPC", # prmtop w/ ff19SB CMAP terms and OPC water (Issue #2449) @@ -425,6 +432,7 @@ TRJ_bz2 = (_data_ref / 'Amber/ache.mdcrd.bz2').as_posix() PFncdf_Top = (_data_ref / 'Amber/posfor.top').as_posix() PFncdf_Trj = (_data_ref / 'Amber/posfor.ncdf').as_posix() +PRM_chainid_bz2 = (_data_ref / "Amber/ache_chainid.prmtop.bz2").as_posix() CPPTRAJ_TRAJ_TOP = (_data_ref / 'Amber/cpptraj_traj.prmtop').as_posix() CPPTRAJ_TRAJ = (_data_ref / 'Amber/cpptraj_traj.nc').as_posix() @@ -451,9 +459,11 @@ PRMNEGATIVE = (_data_ref / 'Amber/ace_mbondi3.negative.parm7').as_posix() -PRMErr1 = (_data_ref / 'Amber/ace_mbondi3.error1.parm7').as_posix() -PRMErr2 = (_data_ref / 'Amber/ace_mbondi3.error2.parm7').as_posix() -PRMErr3 = (_data_ref / 'Amber/ace_mbondi3.error3.parm7').as_posix() +PRMErr1 = (_data_ref / "Amber/ace_mbondi3.error1.parm7").as_posix() +PRMErr2 = (_data_ref / "Amber/ace_mbondi3.error2.parm7").as_posix() +PRMErr3 = (_data_ref / "Amber/ace_mbondi3.error3.parm7").as_posix() +PRMErr4 = (_data_ref / "Amber/ace_mbondi3.error4.parm7").as_posix() +PRMErr5 = (_data_ref / "Amber/ache_chainid.error5.prmtop.bz2").as_posix() PRM_UreyBradley = (_data_ref / 'Amber/parmed_fad.prmtop').as_posix() PRM7_ala2 = (_data_ref / 'Amber/parmed_ala2_solv.parm7').as_posix() diff --git a/testsuite/MDAnalysisTests/topology/test_top.py b/testsuite/MDAnalysisTests/topology/test_top.py index 46ace7514f5..853b4c12e0c 100644 --- a/testsuite/MDAnalysisTests/topology/test_top.py +++ b/testsuite/MDAnalysisTests/topology/test_top.py @@ -30,6 +30,7 @@ from MDAnalysisTests.topology.base import ParserBase from MDAnalysisTests.datafiles import ( PRM, # ache.prmtop + PRM_chainid_bz2, # multi_anche.prmtop.bz2 PRM12, # anti.top PRM7, # tz2.truncoct.parm7.bz2 PRMpbc, @@ -38,6 +39,8 @@ PRMErr1, PRMErr2, PRMErr3, + PRMErr4, + PRMErr5, PRM_UreyBradley, PRM19SBOPC, ) @@ -201,6 +204,196 @@ class TestPRMParser(TOPBase): expected_elems = None +class TestPRMChainidParser(TOPBase): + ref_filename = PRM_chainid_bz2 + # Checks the reading of %FLAG RESIDUE_CHAINID. See PR #4007 + expected_attrs = [ + "names", + "types", + "type_indices", + "charges", + "masses", + "resnames", + "bonds", + "angles", + "dihedrals", + "impropers", + "elements", + "chainIDs", + ] + expected_n_atoms = 677 + expected_n_residues = 38 + expected_n_segments = 3 + expected_n_bonds = 695 + expected_n_angles = 1220 + expected_n_dihedrals = 1797 + expected_n_impropers = 189 + atom_i = 79 + expected_n_zero_bonds = 4 + expected_n_i_bonds = 3 + expected_n_zero_angles = 9 + expected_n_i_angles = 9 + expected_n_zero_dihedrals = 14 + expected_n_i_dihedrals = 15 + expected_n_zero_impropers = 0 + expected_n_i_impropers = 4 + atom_zero_bond_values = ((0, 4), (0, 1), (0, 2), (0, 3)) + atom_i_bond_values = ((79, 80), (79, 83), (77, 79)) + atom_zero_angle_values = ( + (0, 4, 6), + (0, 4, 10), + (3, 0, 4), + (2, 0, 3), + (2, 0, 4), + (1, 0, 2), + (1, 0, 3), + (1, 0, 4), + (0, 4, 5), + ) + atom_i_angle_values = ( + (80, 79, 83), + (77, 79, 80), + (77, 79, 83), + (74, 77, 79), + (79, 80, 81), + (79, 80, 82), + (79, 83, 84), + (79, 83, 85), + (78, 77, 79), + ) + atom_zero_dihedral_values = ( + (0, 4, 10, 11), + (0, 4, 10, 12), + (3, 0, 4, 5), + (3, 0, 4, 6), + (3, 0, 4, 10), + (2, 0, 4, 5), + (2, 0, 4, 6), + (2, 0, 4, 10), + (1, 0, 4, 5), + (1, 0, 4, 6), + (1, 0, 4, 10), + (0, 4, 6, 7), + (0, 4, 6, 8), + (0, 4, 6, 9), + ) + atom_i_dihedral_values = ( + (71, 74, 77, 79), + (74, 77, 79, 80), + (74, 77, 79, 83), + (75, 74, 77, 79), + (76, 74, 77, 79), + (77, 79, 80, 81), + (77, 79, 80, 82), + (77, 79, 83, 84), + (77, 79, 83, 85), + (78, 77, 79, 80), + (78, 77, 79, 83), + (80, 79, 83, 84), + (80, 79, 83, 85), + (81, 80, 79, 83), + (82, 80, 79, 83), + ) + atom_zero_improper_values = () + atom_i_improper_values = ( + (74, 79, 77, 78), + (77, 80, 79, 83), + (79, 81, 80, 82), + (79, 84, 83, 85), + ) + elems_ranges = [[0, 9], [250, 257], [500, 508]] + + expected_elems = [ + np.array( + [ + "N", + "H", + "H", + "H", + "C", + "H", + "C", + "H", + "H", + ], + dtype=object, + ), + np.array( + [ + "O", + "O", + "N", + "H", + "H", + "H", + "C", + ], + dtype=object, + ), + np.array(["H", "C", "O", "O", "N", "H", "H", "H"], dtype=object), + ] + + expected_chainIDs = np.array( + [ + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "A", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "B", + "C", + "C", + "C", + "C", + "C", + "C", + "C", + "C", + "C", + "C", + ] + ) + + def test_chainIDs(self, filename): + """Tests chainIDs attribute. + + If RESIDUE_CHAINID present, residue chainIDs are compared against a + provided list of expected values. + Otherwise, checks that elements are not in the topology attributes. + """ + + u = mda.Universe(filename) + if hasattr(self, "expected_chainIDs"): + reschainIDs = [atomchainIDs[0] for atomchainIDs in u.residues.chainIDs] + assert_equal( + reschainIDs, self.expected_chainIDs, "unexpected element match" + ) + else: + assert not hasattr(u.atoms, "chainIDs"), "Unexpected chainIDs attr" + + class TestPRM12Parser(TOPBase): ref_filename = PRM12 expected_n_atoms = 8923 @@ -465,17 +658,28 @@ class TestPRMEP(TOPBase): class TestErrorsAndWarnings(object): - ATOMIC_NUMBER_MSG = ("ATOMIC_NUMBER record not found, elements attribute " - "will not be populated") - MISSING_ELEM_MSG = ("Unknown ATOMIC_NUMBER value found for some atoms, " - "these have been given an empty element record") - COORDINATE_READER_MSG = ("No coordinate reader found") - - @pytest.mark.parametrize("parm,errmatch", ( - [PRMErr1, "%VE Missing in header"], - [PRMErr2, "'TITLE' missing in header"], - [PRM_UreyBradley, "Chamber-style TOP file"] - )) + ATOMIC_NUMBER_MSG = ( + "ATOMIC_NUMBER record not found, elements attribute will not be populated" + ) + MISSING_ELEM_MSG = ( + "Unknown ATOMIC_NUMBER value found for some atoms, " + "these have been given an empty element record" + ) + COORDINATE_READER_MSG = "No coordinate reader found" + RESIDUE_CHAINID_MSG = ( + "Number of residues (38) does not match number of " + "%RESIDUE_CHAINID (37). Skipping section." + ) + + @pytest.mark.parametrize( + "parm,errmatch", + ( + [PRMErr1, "%VE Missing in header"], + [PRMErr2, "'TITLE' missing in header"], + [PRMErr4, "Invalid header line."], + [PRM_UreyBradley, "Chamber-style TOP file"], + ), + ) def test_value_errors(self, parm, errmatch): with pytest.raises(ValueError, match=errmatch): u = mda.Universe(parm) @@ -484,13 +688,17 @@ def test_flag_index_error(self): with pytest.raises(IndexError, match="%FLAG section not found"): u = mda.Universe(PRMErr3) - @pytest.mark.parametrize("parm, errmsgs", ( - [PRM, [ATOMIC_NUMBER_MSG, COORDINATE_READER_MSG]], - [PRM7, [ATOMIC_NUMBER_MSG, COORDINATE_READER_MSG]], - [PRMpbc, [ATOMIC_NUMBER_MSG, COORDINATE_READER_MSG]], - [PRMNEGATIVE, [MISSING_ELEM_MSG, COORDINATE_READER_MSG]], - [PRM19SBOPC, [MISSING_ELEM_MSG, COORDINATE_READER_MSG]] - )) + @pytest.mark.parametrize( + "parm, errmsgs", + ( + [PRM, [ATOMIC_NUMBER_MSG, COORDINATE_READER_MSG]], + [PRM7, [ATOMIC_NUMBER_MSG, COORDINATE_READER_MSG]], + [PRMpbc, [ATOMIC_NUMBER_MSG, COORDINATE_READER_MSG]], + [PRMNEGATIVE, [MISSING_ELEM_MSG, COORDINATE_READER_MSG]], + [PRM19SBOPC, [MISSING_ELEM_MSG, COORDINATE_READER_MSG]], + [PRMErr5, [RESIDUE_CHAINID_MSG, COORDINATE_READER_MSG]], + ), + ) def test_warning(self, parm, errmsgs): with pytest.warns(UserWarning) as record: u = mda.Universe(parm)