Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature charged ligands #10

Merged
merged 21 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion a3fe/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.3"
__version__ = "0.2.0"
1 change: 1 addition & 0 deletions a3fe/data/t4l_input/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Benzene and T4L input for parameterisation testing. Nice for testing as benzene is small and rapid to parameterise.
30 changes: 30 additions & 0 deletions a3fe/data/t4l_input/ligand.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ligand.pdb
OpenBabel08292314213D

12 12 0 0 0 0 0 0 0 0999 V2000
-32.9690 6.1960 2.8770 C 0 0 0 0 0 0 0 0 0 0 0 0
-32.9450 7.0460 3.9730 C 0 0 0 0 0 0 0 0 0 0 0 0
-33.7190 6.7980 5.1130 C 0 0 0 0 0 0 0 0 0 0 0 0
-34.5400 5.6800 5.1430 C 0 0 0 0 0 0 0 0 0 0 0 0
-34.5450 4.8250 4.0440 C 0 0 0 0 0 0 0 0 0 0 0 0
-33.7870 5.0690 2.9150 C 0 0 0 0 0 0 0 0 0 0 0 0
-32.3899 6.3954 2.0464 H 0 0 0 0 0 0 0 0 0 0 0 0
-32.3412 7.8825 3.9474 H 0 0 0 0 0 0 0 0 0 0 0 0
-33.6793 7.4388 5.9209 H 0 0 0 0 0 0 0 0 0 0 0 0
-35.1351 5.4859 5.9635 H 0 0 0 0 0 0 0 0 0 0 0 0
-35.1357 3.9793 4.0739 H 0 0 0 0 0 0 0 0 0 0 0 0
-33.8263 4.4245 2.1100 H 0 0 0 0 0 0 0 0 0 0 0 0
1 6 2 0 0 0 0
1 2 1 0 0 0 0
1 7 1 0 0 0 0
2 3 2 0 0 0 0
2 8 1 0 0 0 0
3 4 1 0 0 0 0
3 9 1 0 0 0 0
4 10 1 0 0 0 0
5 4 2 0 0 0 0
5 11 1 0 0 0 0
6 5 1 0 0 0 0
6 12 1 0 0 0 0
M END
$$$$
2,639 changes: 2,639 additions & 0 deletions a3fe/data/t4l_input/protein.pdb

Large diffs are not rendered by default.

24 changes: 24 additions & 0 deletions a3fe/run/_simulation_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,30 @@ def failed_simulations(self) -> _List[SimulationRunner]:
for failure in sub_sim_runner.failed_simulations
]

def is_equilibrated(self, run_nos: _Optional[_List[int]] = None) -> bool:
f"""
Whether the {self.__class__.__name__} is equilibrated. This updates
the _equilibrated and _equil_time attributes of the lambda windows,
which are accessed by the equilibrated and equil_time properties.

Parameters
----------
run_nos : List[int], Optional, default=None
A list of the run numbers to check for equilibration. If None, all runs are analysed.

Returns
-------
equilibrated : bool
Whether the {self.__class__.__name__} is equilibrated.
"""
run_nos = self._get_valid_run_nos(run_nos)
return all(
[
sub_sim_runner.is_equilibrated(run_nos=run_nos)
for sub_sim_runner in self._sub_sim_runners
]
)

@property
def equilibrated(self) -> float:
f"""Whether the {self.__class__.__name__} is equilibrated."""
Expand Down
10 changes: 10 additions & 0 deletions a3fe/run/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ def check_has_wat_and_box(system: _BSS._SireWrappers._system.System) -> None: #
raise ValueError("System does not have water.")


def get_single_mol(
system: _BSS._SireWrappers._system.System, mol_name: str
) -> _BSS._SireWrappers._molecule.Molecule: # type: ignore
"""Get a single molecule from a BSS system."""
mols = system.search(f"resname {mol_name}").molecules()
if len(mols) != 1:
raise ValueError(f"Expected 1 molecule with name {mol_name}, got {len(mols)}")
return mols[0]


def get_simtime(
sim_runner: "SimulationRunner", # noqa: F821
run_nos: _Optional[_List[int]] = None,
Expand Down
3 changes: 2 additions & 1 deletion a3fe/run/_virtual_queue.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utilities for the Ensemble, Window, and Simulation Classes"""

import getpass as _getpass
import glob as _glob
import logging as _logging
import os as _os
Expand Down Expand Up @@ -201,7 +202,7 @@ def _read_slurm_queue_inner() -> _List[int]:
to the decorator"""
# Get job ids of currently running jobs. This assumes no array jobs.
commands = [
["squeue", "-h", "-u", _os.getenv("USER")],
["squeue", "-h", "-u", _getpass.getuser()],
["awk", "{print $1}"],
["grep", "-v", "-E", "'\\[|_'"],
["paste", "-s", "-d,", "-"],
Expand Down
20 changes: 20 additions & 0 deletions a3fe/run/lambda_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,26 @@ def get_tot_gpu_time(self, run_nos: _Optional[_List[int]] = None) -> float:
run_nos = self._get_valid_run_nos(run_nos)
return sum([self.sims[run_no - 1].get_tot_gpu_time() for run_no in run_nos])

def is_equilibrated(self, run_nos: _Optional[_List[int]] = None) -> bool:
"""
Check if the ensemble of simulations at the lambda window is
equilibrated, based on the run numbers specified and the
equilibration detection method. Store the equilibration status
and time in private variables if so.

Parameters
----------
run_nos : List[int], Optional, default: None
The run numbers to equilibration detection. If None, all runs will be used.

Returns
-------
equilibrated : bool
True if the simulation is equilibrated, False otherwise.
"""
self._equilibrated, self._equil_time = self.check_equil(self, run_nos=run_nos)
return self._equilibrated

@property
def equilibrated(self) -> bool:
"""Whether equilibration has been achieved."""
Expand Down
61 changes: 38 additions & 23 deletions a3fe/run/leg.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from . import system_prep as _system_prep
from ._restraint import A3feRestraint as _A3feRestraint
from ._simulation_runner import SimulationRunner as _SimulationRunner
from ._utils import get_single_mol as _get_single_mol
from ._virtual_queue import Job as _Job
from ._virtual_queue import VirtualQueue as _VirtualQueue
from .enums import LegType as _LegType
Expand Down Expand Up @@ -715,6 +716,31 @@ def write_input_files(
config: SystemPreparationConfig
Configuration object for the setup of the leg.
"""
# Get the charge of the ligand
lig = _get_single_mol(pre_equilibrated_system, "LIG")
lig_charge = round(lig.charge().value())

# If we have a charged ligand, make sure that SOMD is using PME
if lig_charge != 0:
try:
cuttoff_type = _read_simfile_option(
f"{self.input_dir}/template_config.cfg", "cutoff type"
)
except ValueError: # Will get this if the option is not present (but the default is not PME)
cuttoff_type = None
if cuttoff_type != "PME":
raise ValueError(
f"The ligand has a non-zero charge ({lig_charge}), so SOMD must use PME for the electrostatics. "
"Please set the 'cutoff type' option in the somd.cfg file to 'PME'."
)

self._logger.info(
f"Ligand has charge {lig_charge}. Using co-alchemical ion approach to maintain neutrality."
)

# Figure out where the ligand is in the system
perturbed_resnum = pre_equilibrated_system.getIndex(lig) + 1

# Dummy values get overwritten later
dummy_runtime = 0.001 # ns
dummy_lam_vals = [0.0]
Expand Down Expand Up @@ -775,14 +801,6 @@ def write_input_files(
# by BSS, as well as the restraints options
_shutil.copy(f"{self.input_dir}/template_config.cfg", stage_input_dir)

# Read simfile options
perturbed_resnum = _read_simfile_option(
f"{stage_input_dir}/somd.cfg", "perturbed residue number"
)
# Temporary fix for BSS bug - perturbed residue number is wrong, but since we always add the
# ligand first to the system, this should always be 1 anyway
# TODO: Fix this - raise BSS issue
perturbed_resnum = "1"
try:
use_boresch_restraints = _read_simfile_option(
f"{stage_input_dir}/somd.cfg", "use boresch restraints"
Expand All @@ -798,21 +816,18 @@ def write_input_files(
turn_on_receptor_ligand_restraints_mode = False

# Now write simfile options
_write_simfile_option(
f"{stage_input_dir}/template_config.cfg",
"perturbed residue number",
perturbed_resnum,
)
_write_simfile_option(
f"{stage_input_dir}/template_config.cfg",
"use boresch restraints",
str(use_boresch_restraints),
)
_write_simfile_option(
f"{stage_input_dir}/template_config.cfg",
"turn on receptor-ligand restraints mode",
str(turn_on_receptor_ligand_restraints_mode),
)
options_to_write = {
"perturbed_residue number": str(perturbed_resnum),
"use boresch restraints": use_boresch_restraints,
"turn on receptor-ligand restraints mode": turn_on_receptor_ligand_restraints_mode,
# This automatically uses the co-alchemical ion approach when there is a charge difference
"charge difference": str(-lig_charge),
}

for option, value in options_to_write.items():
_write_simfile_option(
f"{stage_input_dir}/template_config.cfg", option, value
)

# Now overwrite the SOMD generated config file with the updated template
_subprocess.run(
Expand Down
6 changes: 3 additions & 3 deletions a3fe/run/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def _select_input_files(self) -> None:
elif len(rst7_files) > 1:
# Rename the rst7 file for this run to somd.rst7 and delete any other
# rst7 files
self._logger.info("Multiple rst7 files found - renaming")
self._logger.debug("Multiple rst7 files found - renaming")
_subprocess.run(
[
"mv",
Expand Down Expand Up @@ -263,7 +263,7 @@ def _select_input_files(self) -> None:
for file in unwanted_rest_files:
_subprocess.run(["rm", file])
else:
self._logger.info("No restraint file found")
self._logger.debug("No restraint file found")

def _update_simfile(self) -> None:
"""Set the lambda value in the simulation file, as well as some
Expand Down Expand Up @@ -314,7 +314,7 @@ def _get_slurm_file_base(self) -> None:

slurm_file = _os.path.join(self.input_dir, "run_somd.sh")
self.slurm_file_base = _get_slurm_file_base(slurm_file)
self._logger.info(f"Found slurm output file basename: {self.slurm_file_base}")
self._logger.debug(f"Found slurm output file basename: {self.slurm_file_base}")

def run(self, runtime: float = 2.5) -> None:
"""
Expand Down
38 changes: 15 additions & 23 deletions a3fe/run/system_prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import BioSimSpace.Sandpit.Exscientia as _BSS
from pydantic import BaseModel as _BaseModel
from pydantic import Field as _Field
from pydantic import field_validator as _field_validator

from ..read._process_bss_systems import rename_lig as _rename_lig
from ._utils import check_has_wat_and_box as _check_has_wat_and_box
Expand All @@ -36,10 +35,6 @@ class SystemPreparationConfig(_BaseModel):
Whether to use SLURM for the preparation.
forcefields : dict
Forcefields to use for the ligand, protein, and water.
lig_net_charge : int
Net charge of the ligand. It is not recommended to use A3FE for charged ligands
as it uses reaction field electrostatics and does not keep the box netural or apply
corrections. Run at your own risk!
water_model : str
Water model to use.
ion_conc : float
Expand Down Expand Up @@ -77,7 +72,6 @@ class SystemPreparationConfig(_BaseModel):
"protein": "ff14SB",
"water": "tip3p",
}
lig_net_charge: int = _Field(0, ge=-5, le=5)
water_model: str = "tip3p"
ion_conc: float = _Field(0.15, ge=0, lt=1) # M
steps: int = _Field(1000, gt=0, lt=100_000) # This is the default for _BSS
Expand Down Expand Up @@ -182,18 +176,6 @@ class SystemPreparationConfig(_BaseModel):
},
}

@_field_validator("lig_net_charge")
def validate_lig_net_charge(lig_net_charge: int) -> int:
"""Validator for the ligand net charge."""
if lig_net_charge != 0:
# Raise a user warning if the ligand is charged
_warnings.warn(
"Warning: A3FE uses reaction field electrostatics and does not keep the box neutral or apply corrections. "
"This can lead to artefacts in the results. Run charged ligands at your own risk!"
)

return lig_net_charge

class Config:
"""
Pydantic model configuration.
Expand Down Expand Up @@ -316,11 +298,21 @@ def parameterise_input(
lig_sys = _BSS.IO.readMolecules(f"{input_dir}/ligand.sdf")
# Ensure that the ligand is named "LIG"
_rename_lig(lig_sys, "LIG")
param_lig = _BSS.Parameters.parameterise(
molecule=lig_sys[0],
forcefield=cfg.forcefields["ligand"],
net_charge=cfg.lig_net_charge,
).getMolecule()
# Check charge of the ligand
lig = lig_sys[0]
lig_charge = round(lig.charge().value())
if lig_charge != 0:
_warnings.warn(
f"Ligand has a charge of {lig_charge}. Co-alchemical ion approach will be used."
" Ensure that your box is large enough to avoid artefacts."
)

# Only include ligand charge if we're using gaff (OpenFF doesn't need it)
param_args = {"molecule": lig, "forcefield": cfg.forcefields["ligand"]}
if "gaff" in cfg.forcefields["ligand"]:
param_args["net_charge"] = lig_charge

param_lig = _BSS.Parameters.parameterise(**param_args).getMolecule()

# If bound, then parameterise the protein and waters and add to the system
if leg_type == _LegType.BOUND:
Expand Down
33 changes: 33 additions & 0 deletions a3fe/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,39 @@ def calc():
yield calc


@pytest.fixture(scope="session")
def t4l_calc():
"""
Create a calculation using the quickly-parametrised T4L system.
The preparation stage is STRUCTURES_ONLY, and this is used for
testing parameterisation.
"""
with TemporaryDirectory() as dirname:
# Copy T4L structure files
subprocess.run(
["cp", "-r", "a3fe/data/t4l_input", os.path.join(dirname, "input")],
check=True,
)

# Copy over remaining input files
for file in ["run_somd.sh", "template_config.cfg"]:
subprocess.run(
[
"cp",
os.path.join("a3fe/data/example_run_dir/input/", file),
os.path.join(dirname, "input"),
],
check=True,
)

calc = a3.Calculation(
base_dir=dirname,
)
calc._dump()

yield calc


@pytest.fixture(scope="session")
def complex_sys():
"""Create a complex system object to use in tests"""
Expand Down
Loading