Skip to content

Commit

Permalink
Several feature improvements and bugfixes (#130)
Browse files Browse the repository at this point in the history
* Update citations for MMM rotlibs

* Update __str__ and add __repr__ method to RotamerEnsemble

* Fix reference formatting for MMM rotlibs

* Add number of heavy atoms to rotlib_info

* Started adding some math utility functions and tests

* Added has_cycle and cycle_idxs to Topology objects. Add tests

* Add functionality to guess dihedrals when constructing a RotEns from a trajectory

* Fix test broken by setting default burn_in=0 for from_trajectory method

* Change RotEns default eval_clash to True

* Fix bug causing backbone oxygen to mislocate when creating from_trajectory

* Add intra_fit method to RotEns

* Use weights as fallback in save_io when KDE fails

* Add methods to detect rings in molecular topologies and tests

* Fix bug iterating over chain_operators and fix behavior to set all frame chain_operators when coming from a list

* Fix from_trajectory behavior to retain hydrogens by default and not run protein setup. Fix to_rotlib behavior to align rotlib backbone before saving

* update tests and test data

* Update tests to run on Python3.11 and bump dev version
  • Loading branch information
mtessmer committed Apr 29, 2024
1 parent 033b4a0 commit 8c265db
Show file tree
Hide file tree
Showing 23 changed files with 3,177 additions and 2,933 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/PR_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.9]
python-version: [3.11]

steps:
- uses: actions/checkout@v3
Expand Down
22 changes: 19 additions & 3 deletions src/chilife/MolSysIC.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,12 +314,17 @@ def chain_operators(self, op: Dict):
' 3) a list with one dict to set all all frames to the same chain operator.'

if op is None:
if hasattr(self, '_chain_operators'):
from_list = True if isinstance(self._chain_operators, list) else False
else:
from_list=False

logging.info("No protein chain origins have been provided. All chains will start at [0, 0, 0]")

op = {idx: {"ori": np.array([0, 0, 0]), "mx": np.identity(3)}for idx in self._chain_operator_idxs}
self.has_chain_operators = False
self._chain_operators = op
self.apply_chain_operators()
self.apply_chain_operators(from_list=from_list)
else:
self.has_chain_operators = True
if isinstance(op, dict):
Expand Down Expand Up @@ -749,7 +754,7 @@ def load_new(self, z_matrix, **kwargs):
else:
self.apply_chain_operators()

def apply_chain_operators(self, idx=None):
def apply_chain_operators(self, idx=None, from_list=False):
"""
Apply chain operators to the specified frames (``idx``) of the MolSysIC trajectory. If no ``idx`` is provided
then all chain operators will be applied to all frames.
Expand All @@ -764,13 +769,23 @@ def apply_chain_operators(self, idx=None):

cart_coords = self.protein.trajectory.coordinate_array
if isinstance(self._chain_operators, list):
for i, op in zip(idx, self._chain_operators[idx]):
for i, op in zip(idx, self._chain_operators):
for start, stop in self._chain_segs:
current_mx, current_ori = chilife.ic_mx(*cart_coords[i, start:start+3])
mx = self.chain_operators[start]['mx']
ori = self.chain_operators[start]['ori']
m2m3 = current_mx @ mx
cart_coords[i, start:stop] = (cart_coords[i, start:stop] - current_ori) @ m2m3 + ori

elif from_list:
for i in range(len(cart_coords)):
for start, stop in self._chain_segs:
current_mx, current_ori = chilife.ic_mx(*cart_coords[i, start:start+3])
mx = self.chain_operators[start]['mx']
ori = self.chain_operators[start]['ori']
m2m3 = current_mx @ mx
cart_coords[i, start:stop] = (cart_coords[i, start:stop] - current_ori) @ m2m3 + ori

elif isinstance(self._chain_operators, dict):
for start, end in self._chain_segs:
current_mx, current_ori = chilife.ic_mx(*cart_coords[0, start:start + 3])
Expand All @@ -779,6 +794,7 @@ def apply_chain_operators(self, idx=None):
m2m3 = current_mx.T @ mx
cart_coords[:, start:end] = np.einsum('ijk,kl->ijl', cart_coords[:, start:end] - current_ori, m2m3) + ori


def use_frames(self, idxs):
"""
Remove all frames except those specified by `idxs`
Expand Down
95 changes: 79 additions & 16 deletions src/chilife/RotamerEnsemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from itertools import combinations
from scipy.spatial import cKDTree
import igraph as ig
from scipy.stats import skewnorm
from scipy.stats import skewnorm, circstd
import scipy.optimize as opt
import MDAnalysis as mda
import chilife
Expand Down Expand Up @@ -186,12 +186,18 @@ def __init__(self, res, site=None, protein=None, chain=None, rotlib=None, **kwar

def __str__(self):
return (
f"Rotamer ensemble with {np.size(self.weights)} members\n"
f" Name: {self.name}\n"
f" Label: {self.res}\n"
f" Site: {self.site}\n"
f"Rotamer ensemble with {np.size(self.weights)} members\n" +
f" Name: {self.name}\n" +
f" Label: {self.res}\n" +
f" Site: {self.site}\n" +
f" Dihedral definitions:\n" +
f"\n".join([f' {d}' for d in self.dihedral_atoms]) +
f"\n"
)

def __repr__(self):
return str(self)

@classmethod
def from_pdb(
cls,
Expand Down Expand Up @@ -249,7 +255,7 @@ def from_mda(cls, residue, **kwargs):
return cls(res, site, protein, chain, **kwargs)

@classmethod
def from_trajectory(cls, traj, site, chain=None, energy=None, burn_in=100, **kwargs):
def from_trajectory(cls, traj, site, chain=None, energy=None, burn_in=0, **kwargs):
"""
Create a RotamerEnsemble object from a trajectory.
Expand Down Expand Up @@ -294,7 +300,7 @@ def from_trajectory(cls, traj, site, chain=None, energy=None, burn_in=100, **kwa
res = traj.select_atoms(f"segid {chain} and resnum {site} and not altloc B")

resname = res.residues[0].resname
dihedral_defs = kwargs.get('dihedral_atoms', chilife.dihedral_defs.get(resname, ()))
dihedral_defs = kwargs.pop('dihedral_atoms', chilife.dihedral_defs.get(resname, ()))

traj = traj.universe if isinstance(traj, mda.AtomGroup) else traj
coords = np.array([res.atoms.positions for ts in traj.trajectory[burn_in:]])
Expand All @@ -313,12 +319,45 @@ def from_trajectory(cls, traj, site, chain=None, energy=None, burn_in=100, **kwa

pi /= pi.sum()
weights = pi
res = chilife.MolSys.from_atomsel(res, frames=unique_idx)
ICs = chilife.MolSysIC.from_atoms(res)

if not kwargs.setdefault('use_H', True):
res = res.select_atoms('not type H')

res = chilife.MolSys.from_atomsel(res, frames = unique_idx)

ICs = chilife.MolSysIC.from_atoms(res)
ICs.shift_resnum(-(site - 1))

if dihedral_defs == ():
# TODO: move code to a guess_dihedral_defs() function
sc_mask = ~np.isin(ICs.atom_names, ['N', 'CA', 'C', 'O', 'CB'])
ha_mask = ~(ICs.atom_types=='H')
mask = ha_mask * sc_mask
idxs = np.argwhere(mask).flatten()

#
cyverts = ICs.topology.ring_idxs
rotatable_bonds = {}
_idxs = []
for idx in idxs:
dihedral = ICs.z_matrix_idxs[idx]
bond = tuple(dihedral[1:3])

# Skip duplicate dihedral defs
if bond in rotatable_bonds:
continue

# Skip ring dihedrals
elif all(a in cyverts for a in bond):
continue

else:
rotatable_bonds[bond] = dihedral
_idxs.append(idx)

idxs = _idxs
dihedral_defs = [ICs.z_matrix_names[idx][::-1] for idx in idxs]

dihedrals = np.array([ic.get_dihedral(1, dihedral_defs) for ic in ICs])
sigmas = kwargs.get('sigmas', np.array([]))

Expand Down Expand Up @@ -357,7 +396,8 @@ def from_trajectory(cls, traj, site, chain=None, energy=None, burn_in=100, **kwa
lib['spin_weights'] = f['spin_weights']

kwargs.setdefault('eval_clash', False)
return cls(resname, site, traj, chain, lib, **kwargs)
kwargs.setdefault('_match_backbone', False)
return cls(resname, site, chain=chain, rotlib=lib, **kwargs)

def update(self, no_lib=False):
# Sample from library if requested
Expand Down Expand Up @@ -387,9 +427,9 @@ def update(self, no_lib=False):

if self.protein is not None:
self.protein_setup()

def protein_setup(self):
self.to_site()
self.backbone_to_site()

# Get weight of current or closest rotamer
clash_ignore_idx = self.protein.select_atoms(f"resid {self.site} and segid {self.chain}").ix
Expand Down Expand Up @@ -444,11 +484,20 @@ def to_rotlib(self,
if description is None:
description = (f'Rotamer library made with chiLife version {chilife.__version__} using `to_rotlib` method'
f'of a rotamer ensemble.')
ICs = self.internal_coords.copy()

# Remove chain operators to align all labels on backbone
# ICs.chain_operators = None
coords = ICs.protein.trajectory.coordinate_array
bb_idx = np.argwhere(np.isin(ICs.atom_names, ['N', 'CA', 'C'])).flatten()
for i in range(len(coords)):
ori, mx = chilife.local_mx(*coords[i, bb_idx])
coords[i] = (coords[i] - ori) @ mx

lib = {'rotlib': libname,
'resname': self.res,
'coords': self.coords,
'internal_coords': self.internal_coords,
'coords': coords,
'internal_coords': ICs,
'weights': self.weights,
'atom_types': self.atom_types.copy(),
'atom_names': self.atom_names.copy(),
Expand All @@ -459,7 +508,7 @@ def to_rotlib(self,
'description': description,
'comment': comment,
'reference': reference,
'format_version': 1.2}
'format_version': 1.3}

if hasattr(self, 'spin_atoms'):
lib['spin_atoms'] = self.spin_atoms
Expand Down Expand Up @@ -553,6 +602,8 @@ def to_site(self, site_pos: ArrayLike = None) -> None:

self._coords = np.einsum("ijk,kl->ijl", self._coords, cmx) + ori
self.ICs_to_site(ori, mx)
if self._match_backbone:
self.backbone_to_site()

def ICs_to_site(self, cori, cmx):
""" Modify the internal coordinates to be aligned with the site that the RotamerEnsemble is attached to"""
Expand Down Expand Up @@ -1163,7 +1214,8 @@ def dihedrals(self, dihedrals):
self.internal_coords = self.internal_coords.copy()
self.internal_coords.load_new(z_matrix)
self._coords = self.internal_coords.protein.trajectory.coordinate_array.copy()[:, self.ic_mask]
self.backbone_to_site()
if self._match_backbone:
self.backbone_to_site()

# Apply uniform weights
self.weights = np.ones(len(self._dihedrals))
Expand Down Expand Up @@ -1208,6 +1260,16 @@ def protein(self, protein):
raise ValueError("The input protein must be an instance of MDAnalysis.Universe, MDAnalysis.AtomGroup, or "
"chilife.MolSys")

def intra_fit(self):
target = self.backbone

tmx, tori = self.alignment_method(*target)
bbs = np.squeeze(self.coords[:,self.backbone_idx])
mxs, oris = [np.array(x) for x in zip(*[self.alignment_method(*bb) for bb in bbs])]
mxs = mxs.transpose(0, 2, 1) @ tmx

self._coords = (self.coords - oris[:, None, :]) @ mxs + tori[None, None, :]
self.ICs_to_site(tori, tmx)


def get_sasa(self):
Expand Down Expand Up @@ -1279,8 +1341,9 @@ def assign_defaults(kwargs):
"alignment_method": "bisect",
"dihedral_sigmas": 35,
"weighted_sampling": False,
"eval_clash": False,
"eval_clash": True if not kwargs.get('minimize', False) else False,
"use_H": False,
'_match_backbone': True,
"_exclude_nb_interactions": kwargs.pop('exclude_nb_interactions', 3),
"_sample_size": kwargs.pop("sample", False),
"energy_func": chilife.get_lj_rep,
Expand Down
5 changes: 5 additions & 0 deletions src/chilife/SpinLabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ def from_wizard(
prelib.weights /= prelib.weights.sum()
return prelib

def __str__(self):
return (super().__str__() +
f" spin atoms:\n {self.spin_atoms}")



def _base_copy(self, rotlib=None):
return chilife.SpinLabel(self.res, self.site, rotlib=rotlib, chain=self.chain)
18 changes: 18 additions & 0 deletions src/chilife/Topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,24 @@ def __init__(self, mol, bonds, **kwargs):
c1 = self.atoms[c].segid
self.dihedrals_by_resnum[c1, r1, n1, n2, n3, n4] = dihe

@property
def ring_idxs(self):
fund_cycles = self.graph.fundamental_cycles()
cyverts = set()
for cycle in fund_cycles:
for edge in self.graph.es(cycle):
cyverts.update(edge.tuple)

return sorted(cyverts)

@property
def has_rings(self):
if self.ring_idxs == []:
return False
else:
return True


def get_zmatrix_dihedrals(self):
"""
Get the dihedral definitions for the z-matrix.
Expand Down
2 changes: 1 addition & 1 deletion src/chilife/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
from .protein_utils import *
from .scoring import *

__version__ = '1.0.0.dev5'
__version__ = '1.0.0.dev6'
3 changes: 2 additions & 1 deletion src/chilife/chilife.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,8 @@ def _print_rotlib_info(lib_file):
f"Dihedral definitions: ",
*[f' {d}' for d in lib['dihedral_atoms']],
f"Spin atoms: {lib.get('spin_atoms')}",
f"Number of atoms: {atom_counts}\n",
f"Number of atoms: {atom_counts}",
f"Number of heavy atoms: {np.sum(lib['atom_types'] != 'H')}",
f"Reference: {lib['reference']}",
f"chiLife rotlib format: {lib['format_version']}",
f"*"*80)]
Expand Down
Binary file modified src/chilife/data/rotamer_libraries/user_rotlibs/I1M_rotlib.npz
Binary file not shown.
Binary file modified src/chilife/data/rotamer_libraries/user_rotlibs/M1M_rotlib.npz
Binary file not shown.
Binary file modified src/chilife/data/rotamer_libraries/user_rotlibs/R1M_rotlib.npz
Binary file not shown.
Binary file modified src/chilife/data/rotamer_libraries/user_rotlibs/R7M_rotlib.npz
Binary file not shown.
Binary file modified src/chilife/data/rotamer_libraries/user_rotlibs/V1M_rotlib.npz
Binary file not shown.
12 changes: 8 additions & 4 deletions src/chilife/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,11 +533,15 @@ def write_labels(pdb_file: TextIO, *args: SpinLabel, KDE: bool = True, sorted: b
spin_centers = np.atleast_2d(label.spin_centers)

if KDE and len(spin_centers) > 5:
# Perform gaussian KDE to determine electron density
gkde = gaussian_kde(spin_centers.T, weights=label.weights)
try:
# Perform gaussian KDE to determine electron density
gkde = gaussian_kde(spin_centers.T, weights=label.weights)

# Map KDE density to pseudoatoms
vals = gkde.pdf(spin_centers.T)
# Map KDE density to pseudoatoms
vals = gkde.pdf(spin_centers.T)

except:
vals = label.weights

else:
vals = label.weights
Expand Down
31 changes: 31 additions & 0 deletions src/chilife/math_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np

def normalize_angles(angles):
"""
Parameters
----------
angles
Returns
-------
"""
return np.arctan2(np.sin(angles), np.cos(angles))


def angle_dist(angle1, angle2):
"""
Parameters
----------
angle1
angle2
Returns
-------
"""
diff = angle1 - angle2
return np.arctan2(np.sin(diff), np.cos(diff))

23 changes: 23 additions & 0 deletions tests/test_MathUtils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np
from chilife.math_utils import *


def test_normalize_angle():
angles = [-np.pi, 0, np.pi, 3 * np.pi/2, 2*np.pi]
ans = [-np.pi, 0, np.pi, -np.pi/2, 0]
norm_angles = normalize_angles(angles)
np.testing.assert_almost_equal(norm_angles, ans)


def test_angle_dist():
angles = np.linspace(0, 2*np.pi, 100)
ref = np.zeros(100)
ans = normalize_angles(angles)
dist = angle_dist(angles, ref)
np.testing.assert_almost_equal(dist, ans)

# Rotate 90 degrees
angles += np.pi/2
ref += np.pi/2
dist = angle_dist(angles, ref)
np.testing.assert_almost_equal(dist, ans)
Loading

0 comments on commit 8c265db

Please sign in to comment.