Skip to content

Commit

Permalink
Enable PointMutationExecutor input of solvated pdbs (#967)
Browse files Browse the repository at this point in the history
* enable input of solvated pdbs

* clarify docs

* remove line that creates openmm topology from mdtraj topology

* allow cif input files and add other fixes file loading

* remove file close lines

* add clarifying docs
:

* Apply suggestions from code review

Co-authored-by: Mike Henry <11765982+mikemhenry@users.noreply.github.com>
  • Loading branch information
zhang-ivy and mikemhenry authored Mar 29, 2022
1 parent 2c1ea09 commit 59c77fb
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 36 deletions.
109 changes: 73 additions & 36 deletions perses/app/relative_point_mutation_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def __init__(self,
allow_undefined_stereo_sdf=False,
extra_sidechain_map=None,
demap_CBs=False,
solvate=True,
water_model='tip3p',
ionic_strength=0.15 * unit.molar,
forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'],
Expand All @@ -115,8 +116,10 @@ def __init__(self,
"""
arguments
protein_filename : str
path to protein (to mutate); .pdb
path to protein (to mutate); .pdb, .cif
Note: if there are nonstandard residues, the PDB should contain the standard residue name but the atoms/positions should correspond to the nonstandard residue. E.g. if I want to include HID, the PDB should contain HIS for the residue name, but the atoms should correspond to the atoms present in HID. You can use openmm.app.Modeller.addHydrogens() to generate a PDB like this. The same is true for the ligand_input, if its a PDB.
Note: this can be the protein solute only or the solvated protein. if its the former, solvate should be set to True.
if its the latter, solvate should be set to False.
mutation_chain_id : str
name of the chain to be mutated
mutation_residue_id : str
Expand All @@ -132,7 +135,10 @@ def __init__(self,
whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory,
endstate validation cannot and will not be conducted.
ligand_input : str, default None
path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb
path to ligand of interest (i.e. small molecule or protein)
Note: if this is not solvated, it should be the ligand alone (.sdf or .pdb or .cif) and solvate should be set to True.
if this is solvated, this should be the protein-ligand complex (.pdb or .cif) -- with the protein to be mutated first and
the ligand second in the file -- and solvate should be set to False.
ligand_index : int, default 0
which ligand to use
allow_undefined_stereo_sdf : bool, default False
Expand All @@ -141,6 +147,9 @@ def __init__(self,
map of new to old sidechain atom indices to add to the default map (by default, we only map backbone atoms and CBs)
demap_CBs : bool, default False
whether to remove CBs from the mapping
solvate : bool, default True
whether to solvate the protein/complex. If this is False, protein_filename and ligand_input should correspond
to the solvated protein PDB and solvated complex PDB, respectively.
water_model : str, default 'tip3p'
solvent model to use for solvation
ionic_strength : float * unit.molar, default 0.15 * unit.molar
Expand Down Expand Up @@ -179,32 +188,39 @@ def __init__(self,
from openeye import oechem

# First thing to do is load the apo protein to mutate...
protein_pdbfile = open(protein_filename, 'r')
protein_pdb = app.PDBFile(protein_pdbfile)
protein_pdbfile.close()
if protein_filename.endswith('pdb'):
protein_pdb = app.PDBFile(protein_filename)
elif protein_filename.endswith('cif'):
protein_pdb = app.PDBxFile(protein_filename)
else:
raise Exception("protein_filename file format is not supported. supported formats: .pdb, .cif")
protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology)
protein_topology = protein_md_topology.to_openmm()
protein_n_atoms = protein_md_topology.n_atoms

# Load the ligand, if present
molecules = []
if ligand_input:
if isinstance(ligand_input, str):
if ligand_input.endswith('.sdf'): # small molecule
ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index, allow_undefined_stereo=allow_undefined_stereo_sdf)
molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol)
ligand_md_topology = md.Topology.from_openmm(ligand_topology)
ligand_n_atoms = ligand_md_topology.n_atoms

if ligand_input.endswith('pdb'): # protein
ligand_pdbfile = open(ligand_input, 'r')
ligand_pdb = app.PDBFile(ligand_pdbfile)
ligand_pdbfile.close()
ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(
ligand_pdb.topology)
ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index, allow_undefined_stereo=allow_undefined_stereo_sdf)
molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False))
ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol)
ligand_md_topology = md.Topology.from_openmm(ligand_topology)
ligand_n_atoms = ligand_md_topology.n_atoms

elif ligand_input.endswith('pdb'): # protein
ligand_pdb = app.PDBFile(ligand_input)
ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(ligand_pdb.topology)
ligand_n_atoms = ligand_md_topology.n_atoms

elif ligand_input.endswith('cif'): # protein
ligand_pdb = app.PDBxFile(ligand_input)
ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm(ligand_pdb.topology)
ligand_n_atoms = ligand_md_topology.n_atoms

else:
raise Exception("ligand_input file format is not supported. supported formats: .sdf, .pdb, .cif")

elif isinstance(ligand_input, oechem.OEMol): # oemol object
molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False))
ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input), forcefield_generators.generateTopologyFromOEMol(ligand_input)
Expand All @@ -215,18 +231,22 @@ def __init__(self,
_logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file')
return

# Now create a complex
complex_md_topology = protein_md_topology.join(ligand_md_topology)
complex_topology = complex_md_topology.to_openmm()
complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers)
complex_positions[:protein_n_atoms, :] = protein_positions
complex_positions[protein_n_atoms:, :] = ligand_positions

# Convert positions back to openmm vec3 objects
complex_positions_vec3 = []
for position in complex_positions:
complex_positions_vec3.append(openmm.Vec3(*position.value_in_unit_system(unit.md_unit_system)))
complex_positions = unit.Quantity(value=complex_positions_vec3, unit=unit.nanometer)
if solvate:
# Now create a complex
complex_md_topology = protein_md_topology.join(ligand_md_topology)
complex_topology = complex_md_topology.to_openmm()
complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers)
complex_positions[:protein_n_atoms, :] = protein_positions
complex_positions[protein_n_atoms:, :] = ligand_positions

# Convert positions back to openmm vec3 objects
complex_positions_vec3 = []
for position in complex_positions:
complex_positions_vec3.append(openmm.Vec3(*position.value_in_unit_system(unit.md_unit_system)))
complex_positions = unit.Quantity(value=complex_positions_vec3, unit=unit.nanometer)
else:
complex_topology = ligand_topology
complex_positions = ligand_positions

# Now for a system_generator
self.system_generator = SystemGenerator(forcefields=forcefield_files,
Expand All @@ -238,11 +258,27 @@ def __init__(self,
molecules=molecules,
cache=None)

# Solvate apo and complex...
apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions))
inputs = [apo_input]
# Solvate apo and complex (if necessary) and generate systems...
inputs = []
topology_list = [protein_topology]
positions_list = [protein_positions]
box_dimensions_list = [apo_box_dimensions]
if ligand_input:
inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions))
topology_list.append(complex_topology)
positions_list.append(complex_positions)
box_dimensions_list.append(complex_box_dimensions)

for topology, positions, box_dimensions in zip(topology_list, positions_list, box_dimensions_list):
if solvate:
solvated_topology, solvated_positions = self._solvate(topology, positions, water_model, phase,
ionic_strength, box_dimensions)
else:
solvated_topology = topology
solvated_positions = unit.quantity.Quantity(value=np.array(
[list(atom_pos) for atom_pos in positions.value_in_unit_system(unit.md_unit_system)]),
unit=unit.nanometers)
solvated_system = self.system_generator.create_system(solvated_topology)
inputs.append([solvated_topology, solvated_positions, solvated_system])

geometry_engine = FFAllAngleGeometryEngine(metadata=None,
use_sterics=False,
Expand Down Expand Up @@ -304,6 +340,8 @@ def __init__(self,
_logger.info(f"Changed particle {idx}'s sigma from {sigma} to {new_sigma}")

# Only validate energy bookkeeping if the WT and proposed residues do not involve rings
# Note: We don't validate energies for geometry proposals involving ring amino acids because we insert biasing torsions
# for ring transformations (to ensure the amino acids are somewhat in the right geometry), which will corrupt the energy addition during energy validation.
old_res = [res for res in top.residues() if res.id == mutation_residue_id][0]
validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True
new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta,
Expand Down Expand Up @@ -582,6 +620,5 @@ def _solvate(self,

# Canonicalize the solvated positions: turn tuples into np.array
solvated_positions = unit.quantity.Quantity(value=np.array([list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system)]), unit=unit.nanometers)
solvated_system = self.system_generator.create_system(solvated_topology)

return solvated_topology, solvated_positions, solvated_system
return solvated_topology, solvated_positions
26 changes: 26 additions & 0 deletions perses/tests/test_relative_point_mutation_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,29 @@ def test_PointMutationExecutor_endstate_validation():
flatten_exceptions=False,
conduct_endstate_validation=True,
)

def test_PointMutationExecutor_solvated():
from pkg_resources import resource_filename
import os
import tempfile
from openmm import app, unit

from perses.app.relative_point_mutation_setup import PointMutationExecutor
from perses.tests.test_topology_proposal import generate_atp

with tempfile.TemporaryDirectory() as temp_dir:
ala, system_generator = generate_atp(phase='solvent')
app.PDBFile.writeFile(ala.topology, ala.positions, open(os.path.join(temp_dir, "ala_solvated.pdb"), "w"), keepIds=True)

PointMutationExecutor(
os.path.join(temp_dir, "ala_solvated.pdb"),
"1",
"2",
"ASP",
solvate=False,
flatten_torsions=False,
flatten_exceptions=False,
conduct_endstate_validation=False
)


0 comments on commit 59c77fb

Please sign in to comment.