diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 00000000..d38af27f --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +# Add hash of linting PR here diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..84d07967 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +ci: + autoupdate_schedule: "quarterly" +files: ^openmmforcefields/ +repos: +- repo: https://github.com/psf/black + rev: 23.9.1 + hooks: + - id: black +- repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort +- repo: https://github.com/PyCQA/flake8 + rev: 6.1.0 + hooks: + - id: flake8 diff --git a/openmmforcefields/__init__.py b/openmmforcefields/__init__.py index b2b935ee..4af8bdfc 100644 --- a/openmmforcefields/__init__.py +++ b/openmmforcefields/__init__.py @@ -1,12 +1,9 @@ # Add imports here # Handle versioneer +from openmmforcefields.utils import get_ffxml_path + from ._version import get_versions -from .utils import get_ffxml_path versions = get_versions() -__version__ = versions['version'] -__git_revision__ = versions['full-revisionid'] -del get_versions, versions - -from . import _version -__version__ = _version.get_versions()['version'] +__version__ = versions["version"] +__git_revision__ = versions["full-revisionid"] diff --git a/openmmforcefields/_version.py b/openmmforcefields/_version.py index 15bc1c11..c4a5f709 100644 --- a/openmmforcefields/_version.py +++ b/openmmforcefields/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -12,12 +11,12 @@ """Git implementation of _version.py.""" import errno +import functools import os import re import subprocess import sys from typing import Any, Callable, Dict, List, Optional, Tuple -import functools def get_keywords() -> Dict[str, str]: @@ -68,12 +67,14 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" + def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate @@ -100,10 +101,14 @@ def run_command( try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git - process = subprocess.Popen([command] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None), **popen_kwargs) + process = subprocess.Popen( + [command] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + **popen_kwargs, + ) break except OSError as e: if e.errno == errno.ENOENT: @@ -141,15 +146,21 @@ def versions_from_parentdir( for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -212,7 +223,7 @@ def git_versions_from_keywords( # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} + tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -221,7 +232,7 @@ def git_versions_from_keywords( # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = {r for r in refs if re.search(r'\d', r)} + tags = {r for r in refs if re.search(r"\d", r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -229,32 +240,36 @@ def git_versions_from_keywords( for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') - if not re.match(r'\d', r): + if not re.match(r"\d", r): continue if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs( - tag_prefix: str, - root: str, - verbose: bool, - runner: Callable = run_command + tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command ) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. @@ -273,8 +288,7 @@ def git_pieces_from_vcs( env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) - _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=not verbose) + _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -282,10 +296,19 @@ def git_pieces_from_vcs( # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = runner(GITS, [ - "describe", "--tags", "--dirty", "--always", "--long", - "--match", f"{tag_prefix}[[:digit:]]*" - ], cwd=root) + describe_out, rc = runner( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + f"{tag_prefix}[[:digit:]]*", + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -300,8 +323,7 @@ def git_pieces_from_vcs( pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None - branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], - cwd=root) + branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") @@ -341,17 +363,16 @@ def git_pieces_from_vcs( dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -360,10 +381,12 @@ def git_pieces_from_vcs( if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -412,8 +435,7 @@ def render_pep440(pieces: Dict[str, Any]) -> str: rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -442,8 +464,7 @@ def render_pep440_branch(pieces: Dict[str, Any]) -> str: rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" - rendered += "+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -604,11 +625,13 @@ def render_git_describe_long(pieces: Dict[str, Any]) -> str: def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -632,9 +655,13 @@ def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions() -> Dict[str, Any]: @@ -648,8 +675,7 @@ def get_versions() -> Dict[str, Any]: verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -658,13 +684,16 @@ def get_versions() -> Dict[str, Any]: # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for _ in cfg.versionfile_source.split('/'): + for _ in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -678,6 +707,10 @@ def get_versions() -> Dict[str, Any]: except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/openmmforcefields/data/perses_jacs_systems/tests/test_outputs.py b/openmmforcefields/data/perses_jacs_systems/tests/test_outputs.py index 3edfe94e..9b012cd9 100644 --- a/openmmforcefields/data/perses_jacs_systems/tests/test_outputs.py +++ b/openmmforcefields/data/perses_jacs_systems/tests/test_outputs.py @@ -4,12 +4,20 @@ from openmm import app if __name__ == "__main__": - parser = ArgumentParser(epilog='''This program reads a pdb file and tests whether the file that is compatible - with the pdb parsing capabilities in openmm. The first argument should be the - pdb file to be used for testing.''') + parser = ArgumentParser( + epilog="""This program reads a pdb file and tests whether the file that is compatible + with the pdb parsing capabilities in openmm. The first argument should be the + pdb file to be used for testing.""" + ) group = parser.add_argument_group("Required Arguments") - group.add_argument('-p', '--protein', default=None, dest='protein', help='Protein pdb file to test compatibility' - 'with openmm', required=True) + group.add_argument( + "-p", + "--protein", + default=None, + dest="protein", + help="Protein pdb file to test compatibility" "with openmm", + required=True, + ) args = parser.parse_args() pdb = app.PDBFile(args.protein) modeller = app.Modeller(pdb.topology, pdb.positions) @@ -20,6 +28,12 @@ modeller.addHydrogens(forcefield=forcefield) except ValueError as error: print(error) - sys.exit("File %s was not properly prepared and cannot be parsed by openmm" % args.protein) + sys.exit( + "File %s was not properly prepared and cannot be parsed by openmm" + % args.protein + ) - sys.exit("Test passed! Output %s was prepared properly and can be parsed by openmm" % args.protein) + sys.exit( + "Test passed! Output %s was prepared properly and can be parsed by openmm" + % args.protein + ) diff --git a/openmmforcefields/generators/system_generators.py b/openmmforcefields/generators/system_generators.py index 971ffb7b..2d7dac3d 100644 --- a/openmmforcefields/generators/system_generators.py +++ b/openmmforcefields/generators/system_generators.py @@ -15,14 +15,18 @@ # System generator base class ################################################################################ + class classproperty(property): def __get__(self, obj, objtype=None): return super().__get__(objtype) + def __set__(self, obj, value): super().__set__(type(obj), value) + def __delete__(self, obj): super().__delete__(type(obj)) + class SystemGenerator: """ Common interface for generating OpenMM Systems from OpenMM Topology objects @@ -65,7 +69,20 @@ class SystemGenerator: postprocess_system : method If not None, this method will be called as ``system = postprocess_system(system)`` to post-process the System object for create_system(topology) before it is returned. """ - def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', forcefield_kwargs=None, nonperiodic_forcefield_kwargs=None, periodic_forcefield_kwargs=None, template_generator_kwargs=None, barostat=None, molecules=None, cache=None, postprocess_system=None): + + def __init__( + self, + forcefields=None, + small_molecule_forcefield="openff-1.0.0", + forcefield_kwargs=None, + nonperiodic_forcefield_kwargs=None, + periodic_forcefield_kwargs=None, + template_generator_kwargs=None, + barostat=None, + molecules=None, + cache=None, + postprocess_system=None, + ): """ This is a utility class to generate OpenMM Systems from Open Force Field Topology objects using AMBER protein force fields and GAFF small molecule force fields. @@ -166,14 +183,16 @@ def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', f """ # Initialize - self.barostat = barostat # barostat to copy, or None if no barostat is to be added + self.barostat = ( + barostat # barostat to copy, or None if no barostat is to be added + ) # Post-creation system transformations - self.particle_charges = True # include particle charges - self.exception_charges = True # include electrostatics nonzero exceptions - self.particle_epsilons = True # include LJ particles - self.exception_epsilons = True # include LJ nonzero exceptions - self.torsions = True # include torsions + self.particle_charges = True # include particle charges + self.exception_charges = True # include electrostatics nonzero exceptions + self.particle_epsilons = True # include LJ particles + self.exception_epsilons = True # include LJ nonzero exceptions + self.torsions = True # include torsions # Method to use for postprocessing system self.postprocess_system = postprocess_system @@ -181,39 +200,65 @@ def __init__(self, forcefields=None, small_molecule_forcefield='openff-1.0.0', f # Create OpenMM ForceField object forcefields = forcefields if (forcefields is not None) else list() from openmm import app + self.forcefield = app.ForceField(*forcefields) # Cache force fields and settings to use - self.forcefield_kwargs = forcefield_kwargs if forcefield_kwargs is not None else dict() - self.nonperiodic_forcefield_kwargs = nonperiodic_forcefield_kwargs if nonperiodic_forcefield_kwargs is not None else {'nonbondedMethod' : app.NoCutoff} - self.periodic_forcefield_kwargs = periodic_forcefield_kwargs if periodic_forcefield_kwargs is not None else {'nonbondedMethod' : app.PME} + self.forcefield_kwargs = ( + forcefield_kwargs if forcefield_kwargs is not None else dict() + ) + self.nonperiodic_forcefield_kwargs = ( + nonperiodic_forcefield_kwargs + if nonperiodic_forcefield_kwargs is not None + else {"nonbondedMethod": app.NoCutoff} + ) + self.periodic_forcefield_kwargs = ( + periodic_forcefield_kwargs + if periodic_forcefield_kwargs is not None + else {"nonbondedMethod": app.PME} + ) self.template_generator_kwargs = template_generator_kwargs # Raise an exception if nonbondedForce is specified in forcefield_kwargs - if 'nonbondedMethod' in self.forcefield_kwargs: - raise ValueError("""nonbondedMethod cannot be specified in forcefield_kwargs; + if "nonbondedMethod" in self.forcefield_kwargs: + raise ValueError( + """nonbondedMethod cannot be specified in forcefield_kwargs; must be specified in either periodic_forcefield_kwargs (if it should be applied to periodic systems) - or nonperiodic_forcefield_kwargs (if it should be applied to non-periodic systems)""") + or nonperiodic_forcefield_kwargs (if it should be applied to non-periodic systems)""" + ) # Create and cache a residue template generator from openmmforcefields.generators.template_generators import ( SmallMoleculeTemplateGenerator, ) + self.template_generator = None if small_molecule_forcefield is not None: - for template_generator_cls in SmallMoleculeTemplateGenerator.__subclasses__(): + for ( + template_generator_cls + ) in SmallMoleculeTemplateGenerator.__subclasses__(): try: - _logger.debug(f'Trying {template_generator_cls.__name__} to load {small_molecule_forcefield}') - self.template_generator = template_generator_cls(forcefield=small_molecule_forcefield, cache=cache, template_generator_kwargs=self.template_generator_kwargs) + _logger.debug( + f"Trying {template_generator_cls.__name__} to load {small_molecule_forcefield}" + ) + self.template_generator = template_generator_cls( + forcefield=small_molecule_forcefield, + cache=cache, + template_generator_kwargs=self.template_generator_kwargs, + ) break except ValueError as e: - _logger.debug(f' {template_generator_cls.__name__} cannot load {small_molecule_forcefield}') + _logger.debug( + f" {template_generator_cls.__name__} cannot load {small_molecule_forcefield}" + ) _logger.debug(e) if self.template_generator is None: msg = f"No registered small molecule template generators could load force field '{small_molecule_forcefield}'\n" - msg += f"Available installed force fields are:\n" - for template_generator_cls in SmallMoleculeTemplateGenerator.__subclasses__(): - msg += f' {template_generator_cls.__name__}: {template_generator_cls.INSTALLED_FORCEFIELDS}\n' + msg += "Available installed force fields are:\n" + for ( + template_generator_cls + ) in SmallMoleculeTemplateGenerator.__subclasses__(): + msg += f" {template_generator_cls.__name__}: {template_generator_cls.INSTALLED_FORCEFIELDS}\n" raise ValueError(msg) self.forcefield.registerTemplateGenerator(self.template_generator.generator) @@ -227,6 +272,7 @@ def SMALL_MOLECULE_FORCEFIELDS(cls): from openmmforcefields.generators.template_generators import ( SmallMoleculeTemplateGenerator, ) + for template_generator_cls in SmallMoleculeTemplateGenerator.__subclasses__(): forcefields += template_generator_cls.INSTALLED_FORCEFIELDS return forcefields @@ -245,7 +291,9 @@ def add_molecules(self, molecules): """ if self.template_generator is None: - raise ValueError("You must have a small molecule residue template generator registered to add small molecules") + raise ValueError( + "You must have a small molecule residue template generator registered to add small molecules" + ) self.template_generator.add_molecules(molecules) @@ -257,11 +305,12 @@ def _modify_forces(self, system): if (self.barostat is not None) and system.usesPeriodicBoundaryConditions(): import numpy as np import openmm + MAXINT = np.iinfo(np.int32).max # Determine pressure, temperature, and frequency pressure = self.barostat.getDefaultPressure() - if hasattr(self.barostat, 'getDefaultTemperature'): + if hasattr(self.barostat, "getDefaultTemperature"): temperature = self.barostat.getDefaultTemperature() else: temperature = self.barostat.getTemperature() @@ -276,7 +325,7 @@ def _modify_forces(self, system): # Modify forces if requested for force in system.getForces(): - if force.__class__.__name__ == 'NonbondedForce': + if force.__class__.__name__ == "NonbondedForce": for index in range(force.getNumParticles()): charge, sigma, epsilon = force.getParticleParameters(index) if not self.particle_charges: @@ -285,18 +334,26 @@ def _modify_forces(self, system): epsilon *= 0 force.setParticleParameters(index, charge, sigma, epsilon) for index in range(force.getNumExceptions()): - p1, p2, chargeProd, sigma, epsilon = force.getExceptionParameters(index) + p1, p2, chargeProd, sigma, epsilon = force.getExceptionParameters( + index + ) if not self.exception_charges: chargeProd *= 0 if not self.exception_epsilons: epsilon *= 0 - force.setExceptionParameters(index, p1, p2, chargeProd, sigma, epsilon) - elif force.__class__.__name__ == 'PeriodicTorsionForce': + force.setExceptionParameters( + index, p1, p2, chargeProd, sigma, epsilon + ) + elif force.__class__.__name__ == "PeriodicTorsionForce": for index in range(force.getNumTorsions()): - p1, p2, p3, p4, periodicity, phase, K = force.getTorsionParameters(index) + p1, p2, p3, p4, periodicity, phase, K = force.getTorsionParameters( + index + ) if not self.torsions: K *= 0 - force.setTorsionParameters(index, p1, p2, p3, p4, periodicity, phase, K) + force.setTorsionParameters( + index, p1, p2, p3, p4, periodicity, phase, K + ) def create_system(self, topology, molecules=None): """ @@ -326,6 +383,7 @@ def create_system(self, topology, molecules=None): # Build the kwargs to use import copy + forcefield_kwargs = copy.deepcopy(self.forcefield_kwargs) if topology.getPeriodicBoxVectors() is None: forcefield_kwargs.update(self.nonperiodic_forcefield_kwargs) @@ -344,10 +402,12 @@ def create_system(self, topology, molecules=None): return system + ################################################################################ # Dummy system generator ################################################################################ + class DummySystemGenerator(SystemGenerator): """ Dummy force field that can add basic parameters to any system for testing purposes. @@ -362,6 +422,7 @@ class DummySystemGenerator(SystemGenerator): * Torsions are added with periodicity 3, but no barrier height """ + def create_system(self, topology, **kwargs): """ Create a System object with simple parameters from the provided Topology @@ -381,8 +442,11 @@ def create_system(self, topology, **kwargs): """ # TODO: Allow periodicity to be determined from topology + import openmm + from openmm import unit from openmmtools.constants import kB - kT = kB * 300*unit.kelvin # hard-coded temperature for setting energy scales + + kT = kB * 300 * unit.kelvin # hard-coded temperature for setting energy scales # Create a System system = openmm.System() @@ -394,9 +458,9 @@ def create_system(self, topology, **kwargs): # Add simple repulsive interactions # TODO: Use softcore repulsive interaction; Gaussian times switch? - nonbonded = openmm.CustomNonbondedForce('100/(r/0.1)^4') - nonbonded.setNonbondedMethod(openmm.CustomNonbondedForce.CutoffNonPeriodic); - nonbonded.setCutoffDistance(1*unit.nanometer) + nonbonded = openmm.CustomNonbondedForce("100/(r/0.1)^4") + nonbonded.setNonbondedMethod(openmm.CustomNonbondedForce.CutoffNonPeriodic) + nonbonded.setCutoffDistance(1 * unit.nanometer) system.addForce(nonbonded) for atom in topology.atoms: nonbonded.addParticle([]) @@ -405,7 +469,7 @@ def create_system(self, topology, **kwargs): bondedToAtom = [] for atom in topology.atoms(): bondedToAtom.append(set()) - for (atom1, atom2) in topology.bonds(): + for atom1, atom2 in topology.bonds(): bondedToAtom[atom1.index].add(atom2.index) bondedToAtom[atom2.index].add(atom1.index) return bondedToAtom @@ -435,11 +499,13 @@ def create_system(self, topology, **kwargs): else: uniqueAngles.add((atom, bond.atom2, bond.atom1)) angles = sorted(list(uniqueAngles)) - theta0 = 109.5 * unit.degrees # TODO: Adapt based on number of bonds to each atom? + theta0 = ( + 109.5 * unit.degrees + ) # TODO: Adapt based on number of bonds to each atom? sigma_theta = 10 * unit.degrees Ktheta = kT / sigma_theta**2 angle_force = openmm.HarmonicAngleForce() - for (atom1, atom2, atom3) in angles: + for atom1, atom2, atom3 in angles: angles.addAngle(atom1.index, atom2.index, atom3.index, theta0, Ktheta) system.addForce(angle_force) @@ -463,8 +529,16 @@ def create_system(self, topology, **kwargs): periodicity = 3 phase = 0.0 * unit.degrees Kphi = 0.0 * kT - for (atom1, atom2, atom3, atom4) in propers: - torsion_force.add_torsion(atom1.index, atom2.index, atom3.index, atom4.index, periodicity, phase, Kphi) + for atom1, atom2, atom3, atom4 in propers: + torsion_force.add_torsion( + atom1.index, + atom2.index, + atom3.index, + atom4.index, + periodicity, + phase, + Kphi, + ) system.addForce(torsion_force) return system diff --git a/openmmforcefields/generators/template_generators.py b/openmmforcefields/generators/template_generators.py index a7413751..2dfa28ed 100644 --- a/openmmforcefields/generators/template_generators.py +++ b/openmmforcefields/generators/template_generators.py @@ -22,9 +22,11 @@ # Small molecule OpenMM ForceField template generation utilities ################################################################################ + class ForceException(Exception): """Exception for forces""" + class SmallMoleculeTemplateGenerator: """ Abstract base class for small molecule template generation for OpenMM ForceField. @@ -38,6 +40,7 @@ class SmallMoleculeTemplateGenerator: If not None, the generated ffxml file will be written to this filename. Default is None. """ + def __init__(self, molecules=None, cache=None): """ Create a tempalte generator with some OpenFF toolkit molecules @@ -61,11 +64,15 @@ def __init__(self, molecules=None, cache=None): # Set up cache self._cache = cache - self._smiles_added_to_db = set() # set of SMILES added to the database this session - self._database_table_name = None # this must be set by subclasses for cache to function + self._smiles_added_to_db = ( + set() + ) # set of SMILES added to the database this session + self._database_table_name = ( + None # this must be set by subclasses for cache to function + ) # Name of the force field - self._forcefield = None # this must be set by subclasses + self._forcefield = None # this must be set by subclasses # File to write ffxml to if requested self.debug_ffxml_filename = None @@ -77,10 +84,14 @@ def forcefield(self): @contextlib.contextmanager def _open_db(self): - """Open the cache database. - """ + """Open the cache database.""" from tinydb import TinyDB - tinydb_kwargs = { 'sort_keys' : True, 'indent' : 4, 'separators' : (',', ': ') } # for pretty-printing + + tinydb_kwargs = { + "sort_keys": True, + "indent": 4, + "separators": (",", ": "), + } # for pretty-printing db = TinyDB(self._cache, **tinydb_kwargs) try: yield db @@ -117,17 +128,20 @@ def add_molecules(self, molecules=None): # Ensure molecules is an iterable try: - iterator = iter(molecules) - except TypeError as te: - molecules = [ molecules ] + iter(molecules) + except TypeError: + molecules = [molecules] # Create copies # TODO: Do we need to try to construct molecules with other schemes, such as Molecule.from_smiles(), if needed? import copy - molecules = [ copy.deepcopy(molecule) for molecule in molecules ] + + molecules = [copy.deepcopy(molecule) for molecule in molecules] # Cache molecules - self._molecules.update( { molecule.to_smiles() : molecule for molecule in molecules } ) + self._molecules.update( + {molecule.to_smiles(): molecule for molecule in molecules} + ) @staticmethod def _match_residue(residue, molecule_template): @@ -159,15 +173,21 @@ def _match_residue(residue, molecule_template): import networkx as nx # Build list of external bonds for residue - number_of_external_bonds = { atom : 0 for atom in residue.atoms() } + number_of_external_bonds = {atom: 0 for atom in residue.atoms()} for bond in residue.external_bonds(): - if bond[0] in number_of_external_bonds: number_of_external_bonds[bond[0]] += 1 - if bond[1] in number_of_external_bonds: number_of_external_bonds[bond[1]] += 1 + if bond[0] in number_of_external_bonds: + number_of_external_bonds[bond[0]] += 1 + if bond[1] in number_of_external_bonds: + number_of_external_bonds[bond[1]] += 1 # Residue graph residue_graph = nx.Graph() for atom in residue.atoms(): - residue_graph.add_node(atom, element=atom.element.atomic_number, number_of_external_bonds=number_of_external_bonds[atom]) + residue_graph.add_node( + atom, + element=atom.element.atomic_number, + number_of_external_bonds=number_of_external_bonds[atom], + ) for bond in residue.internal_bonds(): residue_graph.add_edge(bond[0], bond[1]) @@ -176,28 +196,45 @@ def _match_residue(residue, molecule_template): # See https://docs.eyesopen.com/toolkits/python/oechemtk/OEChemClasses/OEAtomBase.html template_graph = nx.Graph() for atom_index, atom in enumerate(molecule_template.atoms): - template_graph.add_node(atom_index, element=atom.atomic_number, number_of_external_bonds=0) + template_graph.add_node( + atom_index, element=atom.atomic_number, number_of_external_bonds=0 + ) for bond in molecule_template.bonds: template_graph.add_edge(bond.atom1_index, bond.atom2_index) # DEBUG - #print(f'residue_graph: nodes {len(list(residue_graph.nodes))} edges {len(list(residue_graph.edges))}') - #print(f'template_graph: nodes {len(list(template_graph.nodes))} edges {len(list(template_graph.edges))}') + # print(f'residue_graph: nodes {len(list(residue_graph.nodes))} edges {len(list(residue_graph.edges))}') + # print(f'template_graph: nodes {len(list(template_graph.nodes))} edges {len(list(template_graph.edges))}') # Determine graph isomorphism from networkx.algorithms import isomorphism + def node_match(n1, n2): """Return True of nodes match, and False if not""" - return (n1['element']==n2['element']) and (n1['number_of_external_bonds']==n2['number_of_external_bonds']) - graph_matcher = isomorphism.GraphMatcher(residue_graph, template_graph, node_match=node_match) - if graph_matcher.is_isomorphic() == False: + return (n1["element"] == n2["element"]) and ( + n1["number_of_external_bonds"] == n2["number_of_external_bonds"] + ) + + graph_matcher = isomorphism.GraphMatcher( + residue_graph, template_graph, node_match=node_match + ) + if not graph_matcher.is_isomorphic(): return None # Translate to local residue atom indices # TODO: This can be simplified because molecule_template uses atom index as key - atom_index_within_residue = { atom : index for (index, atom) in enumerate(residue.atoms()) } - atom_index_within_template = { index : index for (index, atom) in enumerate(molecule_template.atoms) } - matches = { atom_index_within_residue[residue_atom] : atom_index_within_template[template_atom] for (residue_atom, template_atom) in graph_matcher.mapping.items() } + atom_index_within_residue = { + atom: index for (index, atom) in enumerate(residue.atoms()) + } + atom_index_within_template = { + index: index for (index, atom) in enumerate(molecule_template.atoms) + } + matches = { + atom_index_within_residue[residue_atom]: atom_index_within_template[ + template_atom + ] + for (residue_atom, template_atom) in graph_matcher.mapping.items() + } return matches @@ -276,13 +313,15 @@ def generator(self, forcefield, residue): """ if self._database_table_name is None: - raise NotImplementedError('SmallMoleculeTemplateGenerator is an abstract base class and cannot be used directly.') + raise NotImplementedError( + "SmallMoleculeTemplateGenerator is an abstract base class and cannot be used directly." + ) from io import StringIO # TODO: Refactor to reduce code duplication - _logger.info(f'Requested to generate parameters for residue {residue}') + _logger.info(f"Requested to generate parameters for residue {residue}") # If a database is specified, check against molecules in the database if self._cache is not None: @@ -290,20 +329,25 @@ def generator(self, forcefield, residue): table = db.table(self._database_table_name) for entry in table: # Skip any molecules we've added to the database this session - if entry['smiles'] in self._smiles_added_to_db: + if entry["smiles"] in self._smiles_added_to_db: continue # See if the template matches from openff.toolkit import Molecule - molecule_template = Molecule.from_smiles(entry['smiles'], allow_undefined_stereo=True) + + molecule_template = Molecule.from_smiles( + entry["smiles"], allow_undefined_stereo=True + ) _logger.debug(f"Checking against {entry['smiles']}") if self._match_residue(residue, molecule_template): - ffxml_contents = entry['ffxml'] + ffxml_contents = entry["ffxml"] # Write to debug file if requested if self.debug_ffxml_filename is not None: - with open(self.debug_ffxml_filename, 'w') as outfile: - _logger.debug(f'writing ffxml to {self.debug_ffxml_filename}') + with open(self.debug_ffxml_filename, "w") as outfile: + _logger.debug( + f"writing ffxml to {self.debug_ffxml_filename}" + ) outfile.write(ffxml_contents) # Add parameters and residue template for this residue @@ -320,8 +364,8 @@ def generator(self, forcefield, residue): # Write to debug file if requested if self.debug_ffxml_filename is not None: - with open(self.debug_ffxml_filename, 'w') as outfile: - _logger.debug(f'writing ffxml to {self.debug_ffxml_filename}') + with open(self.debug_ffxml_filename, "w") as outfile: + _logger.debug(f"writing ffxml to {self.debug_ffxml_filename}") outfile.write(ffxml_contents) # Add the parameters and residue definition @@ -330,12 +374,14 @@ def generator(self, forcefield, residue): if self._cache is not None: with self._open_db() as db: table = db.table(self._database_table_name) - _logger.debug(f'Writing residue template for {smiles} to cache {self._cache}') - record = {'smiles' : smiles, 'ffxml' : ffxml_contents} + _logger.debug( + f"Writing residue template for {smiles} to cache {self._cache}" + ) + record = {"smiles": smiles, "ffxml": ffxml_contents} # Add the IUPAC name for convenience if we can try: - record['iupac'] = molecule.to_iupac() - except Exception as e: + record["iupac"] = molecule.to_iupac() + except Exception: pass # Store the record table.insert(record) @@ -345,13 +391,17 @@ def generator(self, forcefield, residue): return True # Report that we have failed to parameterize the residue - _logger.warning(f'Did not recognize residue {residue.name}; did you forget to call .add_molecules() to add it?') + _logger.warning( + f"Did not recognize residue {residue.name}; did you forget to call .add_molecules() to add it?" + ) return False + ################################################################################ # GAFF-specific OpenMM ForceField template generation utilities ################################################################################ + class GAFFTemplateGenerator(SmallMoleculeTemplateGenerator): """ OpenMM ForceField residue template generator for GAFF/AM1-BCC using pre-cached OpenFF toolkit molecules. @@ -399,7 +449,14 @@ class GAFFTemplateGenerator(SmallMoleculeTemplateGenerator): ['gaff-1.4', 'gaff-1.8', 'gaff-1.81', 'gaff-2.1', 'gaff-2.11'] """ - INSTALLED_FORCEFIELDS = ['gaff-1.4', 'gaff-1.8', 'gaff-1.81', 'gaff-2.1', 'gaff-2.11'] + + INSTALLED_FORCEFIELDS = [ + "gaff-1.4", + "gaff-1.8", + "gaff-1.81", + "gaff-2.1", + "gaff-2.11", + ] def __init__(self, molecules=None, forcefield=None, cache=None, **kwargs): """ @@ -473,25 +530,31 @@ def __init__(self, molecules=None, forcefield=None, cache=None, **kwargs): forcefield = self.INSTALLED_FORCEFIELDS[-1] # Ensure a valid GAFF version is specified - if not forcefield in self.INSTALLED_FORCEFIELDS: - raise ValueError(f"Specified 'forcefield' ({forcefield}) must be one of {self.INSTALLED_FORCEFIELDS}") + if forcefield not in self.INSTALLED_FORCEFIELDS: + raise ValueError( + f"Specified 'forcefield' ({forcefield}) must be one of {self.INSTALLED_FORCEFIELDS}" + ) # Store user-specified GAFF version self._forcefield = forcefield import re - result = re.match(r'^gaff-(?P\d+)\.(?P\d+)$', forcefield) + + result = re.match( + r"^gaff-(?P\d+)\.(?P\d+)$", forcefield + ) if result is None: msg = "'forcefield' must be of form 'gaff-X.Y', where X and Y denote major and minor version\n" msg += f"Provided 'forcefield' argument was '{forcefield}'\n" msg += f"Supported values are: {self.INSTALLED_FORCEFIELDS}" raise ValueError(msg) - self._gaff_major_version = result['major_version'] - self._gaff_minor_version = result['minor_version'] - self._gaff_version = f'{self._gaff_major_version}.{self._gaff_minor_version}' + self._gaff_major_version = result["major_version"] + self._gaff_minor_version = result["minor_version"] + self._gaff_version = f"{self._gaff_major_version}.{self._gaff_minor_version}" # Track parameters by GAFF version string # TODO: Use file hash instead of name? import os + self._database_table_name = os.path.basename(forcefield) # Track which OpenMM ForceField objects have loaded the relevant GAFF parameters @@ -500,7 +563,7 @@ def __init__(self, molecules=None, forcefield=None, cache=None, **kwargs): @property def gaff_version(self): """The current GAFF version in use""" - return self._gaff_major_version + '.' + self._gaff_minor_version + return self._gaff_major_version + "." + self._gaff_minor_version @property def gaff_major_version(self): @@ -516,14 +579,22 @@ def gaff_minor_version(self): def gaff_dat_filename(self): """File path to the GAFF .dat AMBER force field file""" from pkg_resources import resource_filename - filename = resource_filename('openmmforcefields', os.path.join('ffxml', 'amber', 'gaff', 'dat', f'{self._forcefield}.dat')) + + filename = resource_filename( + "openmmforcefields", + os.path.join("ffxml", "amber", "gaff", "dat", f"{self._forcefield}.dat"), + ) return filename @property def gaff_xml_filename(self): """File path to the GAFF .ffxml OpenMM force field file""" from pkg_resources import resource_filename - filename = resource_filename('openmmforcefields', os.path.join('ffxml', 'amber', 'gaff', 'ffxml', f'{self._forcefield}.xml')) + + filename = resource_filename( + "openmmforcefields", + os.path.join("ffxml", "amber", "gaff", "ffxml", f"{self._forcefield}.xml"), + ) return filename def generator(self, forcefield, residue): @@ -545,7 +616,7 @@ def generator(self, forcefield, residue): """ # Load the GAFF parameters if we haven't done so already for this force field - if not forcefield in self._gaff_parameters_loaded: + if forcefield not in self._gaff_parameters_loaded: # Instruct the ForceField to load the GAFF parameters forcefield.loadFile(self.gaff_xml_filename) # Note that we've loaded the GAFF parameters @@ -593,7 +664,9 @@ def generate_residue_template(self, molecule, residue_atoms=None): # Use the canonical isomeric SMILES to uniquely name the template smiles = molecule.to_smiles() - _logger.info(f'Generating a residue template for {smiles} using {self._forcefield}') + _logger.info( + f"Generating a residue template for {smiles} using {self._forcefield}" + ) # Generate unique atom names self._generate_unique_atom_names(molecule) @@ -601,54 +674,66 @@ def generate_residue_template(self, molecule, residue_atoms=None): # Compute net formal charge net_charge = molecule.total_charge - _logger.debug(f'Total charge is {net_charge}') + _logger.debug(f"Total charge is {net_charge}") # Compute partial charges if required if self._molecule_has_user_charges(molecule): - _logger.debug(f'Using user-provided charges because partial charges are nonzero...') + _logger.debug( + "Using user-provided charges because partial charges are nonzero..." + ) else: - _logger.debug(f'Computing AM1-BCC charges...') + _logger.debug("Computing AM1-BCC charges...") # NOTE: generate_conformers seems to be required for some molecules # https://github.com/openforcefield/openff-toolkit/issues/492 molecule.generate_conformers(n_conformers=10) - molecule.assign_partial_charges(partial_charge_method='am1bcc') + molecule.assign_partial_charges(partial_charge_method="am1bcc") # Geneate a single conformation - _logger.debug(f'Generating a conformer...') + _logger.debug("Generating a conformer...") molecule.generate_conformers(n_conformers=1) # Create temporary directory for running antechamber import os import tempfile + tmpdir = tempfile.mkdtemp() - prefix = 'molecule' - input_sdf_filename = os.path.join(tmpdir, prefix + '.sdf') - gaff_mol2_filename = os.path.join(tmpdir, prefix + '.gaff.mol2') - frcmod_filename = os.path.join(tmpdir, prefix + '.frcmod') + prefix = "molecule" + input_sdf_filename = os.path.join(tmpdir, prefix + ".sdf") + gaff_mol2_filename = os.path.join(tmpdir, prefix + ".gaff.mol2") + frcmod_filename = os.path.join(tmpdir, prefix + ".frcmod") # Write MDL SDF file for input into antechamber - molecule.to_file(input_sdf_filename, file_format='sdf') + molecule.to_file(input_sdf_filename, file_format="sdf") # Parameterize the molecule with antechamber (without charging) - _logger.debug(f'Running antechamber...') - self._run_antechamber(molecule_filename=input_sdf_filename, input_format='mdl', - gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename) + _logger.debug("Running antechamber...") + self._run_antechamber( + molecule_filename=input_sdf_filename, + input_format="mdl", + gaff_mol2_filename=gaff_mol2_filename, + frcmod_filename=frcmod_filename, + ) # Read the resulting GAFF mol2 file atom types - _logger.debug(f'Reading GAFF atom types...') + _logger.debug("Reading GAFF atom types...") self._read_gaff_atom_types_from_mol2(gaff_mol2_filename, molecule) - # If residue_atoms = None, add all atoms to the residues - if residue_atoms == None: - residue_atoms = [ atom for atom in molecule.atoms ] + # If residue_atoms == None, add all atoms to the residues + if not residue_atoms: + residue_atoms = [atom for atom in molecule.atoms] # Modify partial charges so that charge on residue atoms is integral # TODO: This may require some modification to correctly handle API changes # when OpenFF toolkit makes charge quantities consistently unit-bearing # or pure numbers. - _logger.debug(f'Fixing partial charges...') - _logger.debug(f'{molecule.partial_charges}') - residue_charge = ensure_quantity(0.0 * unit.elementary_charge, unit_solution) + _logger.debug("Fixing partial charges...") + _logger.debug(f"{molecule.partial_charges}") + + # This variable is unsed! + residue_charge = ensure_quantity( # noqaa + 0.0 * unit.elementary_charge, + unit_solution, + ) total_charge = molecule.partial_charges.sum() sum_of_absolute_charge = np.sum(np.abs(molecule.partial_charges)) @@ -669,47 +754,55 @@ def generate_residue_template(self, molecule, residue_atoms=None): if redistribute: # Redistribute excess charge proportionally to absolute charge - molecule.partial_charges = molecule.partial_charges + charge_deficit * abs(molecule.partial_charges) / sum_of_absolute_charge - _logger.debug(f'{molecule.partial_charges}') + molecule.partial_charges = ( + molecule.partial_charges + + charge_deficit + * abs(molecule.partial_charges) + / sum_of_absolute_charge + ) + _logger.debug(f"{molecule.partial_charges}") # Generate additional parameters if needed # TODO: Do we have to make sure that we don't duplicate existing parameters already loaded in the forcefield? - _logger.debug(f'Creating ffxml contents for additional parameters...') - from inspect import ( - signature, # use introspection to support multiple parmed versions + _logger.debug("Creating ffxml contents for additional parameters...") + from inspect import ( # use introspection to support multiple parmed versions + signature, ) from io import StringIO - leaprc = StringIO('parm = loadamberparams %s' % frcmod_filename) + + leaprc = StringIO("parm = loadamberparams %s" % frcmod_filename) import parmed + params = parmed.amber.AmberParameterSet.from_leaprc(leaprc) kwargs = {} - if 'remediate_residues' in signature(parmed.openmm.OpenMMParameterSet.from_parameterset).parameters: - kwargs['remediate_residues'] = False + if ( + "remediate_residues" + in signature(parmed.openmm.OpenMMParameterSet.from_parameterset).parameters + ): + kwargs["remediate_residues"] = False params = parmed.openmm.OpenMMParameterSet.from_parameterset(params, **kwargs) ffxml = StringIO() kwargs = {} - if 'write_unused' in signature(params.write).parameters: - kwargs['write_unused'] = True + if "write_unused" in signature(params.write).parameters: + kwargs["write_unused"] = True params.write(ffxml, **kwargs) ffxml_contents = ffxml.getvalue() # Create the residue template - _logger.debug(f'Creating residue template...') + _logger.debug("Creating residue template...") from lxml import etree + root = etree.fromstring(ffxml_contents) # Create residue definitions residues = etree.SubElement(root, "Residues") residue = etree.SubElement(residues, "Residue", name=smiles) for atom in molecule.atoms: - if uses_old_api: - charge_string =str( + charge_string = str( atom.partial_charge.value_in_unit(openmm_unit.elementary_charge) ) else: - charge_string = str( - atom.partial_charge.m_as(unit.elementary_charge) - ) + charge_string = str(atom.partial_charge.m_as(unit.elementary_charge)) atom = etree.SubElement( residue, @@ -721,19 +814,34 @@ def generate_residue_template(self, molecule, residue_atoms=None): for bond in molecule.bonds: if (bond.atom1 in residue_atoms) and (bond.atom2 in residue_atoms): - bond = etree.SubElement(residue, "Bond", atomName1=bond.atom1.name, atomName2=bond.atom2.name) + bond = etree.SubElement( + residue, + "Bond", + atomName1=bond.atom1.name, + atomName2=bond.atom2.name, + ) elif (bond.atom1 in residue_atoms) and (bond.atom2 not in residue_atoms): - bond = etree.SubElement(residue, "ExternalBond", atomName=bond.atom1.name) + bond = etree.SubElement( + residue, "ExternalBond", atomName=bond.atom1.name + ) elif (bond.atom1 not in residue_atoms) and (bond.atom2 in residue_atoms): - bond = etree.SubElement(residue, "ExternalBond", atomName=bond.atom2.name) + bond = etree.SubElement( + residue, "ExternalBond", atomName=bond.atom2.name + ) # Render XML into string and append to parameters - ffxml_contents = etree.tostring(root, pretty_print=True, encoding='unicode') - _logger.debug(f'ffxml creation complete.') + ffxml_contents = etree.tostring(root, pretty_print=True, encoding="unicode") + _logger.debug("ffxml creation complete.") return ffxml_contents - def _run_antechamber(self, molecule_filename, input_format='sdf', - gaff_mol2_filename=None, frcmod_filename=None, verbosity=0): + def _run_antechamber( + self, + molecule_filename, + input_format="sdf", + gaff_mol2_filename=None, + frcmod_filename=None, + verbosity=0, + ): """Run AmberTools antechamber and parmchk2 to create GAFF mol2 and frcmod files. Parameters @@ -761,15 +869,16 @@ def _run_antechamber(self, molecule_filename, input_format='sdf', Amber frcmod file containing additional parameters for the molecule not found in corresponding gaff.dat """ if gaff_mol2_filename is None: - gaff_mol2_filename = 'molecule.gaff.mol2' + gaff_mol2_filename = "molecule.gaff.mol2" if frcmod_filename is None: - frcmod_filename = 'molecule.frcmod' + frcmod_filename = "molecule.frcmod" # Build absolute paths for input and output files import os - molecule_filename = os.path.abspath( molecule_filename ) - gaff_mol2_filename = os.path.abspath( gaff_mol2_filename ) - frcmod_filename = os.path.abspath( frcmod_filename ) + + molecule_filename = os.path.abspath(molecule_filename) + gaff_mol2_filename = os.path.abspath(gaff_mol2_filename) + frcmod_filename = os.path.abspath(frcmod_filename) def read_file_contents(filename): infile = open(filename) @@ -780,75 +889,80 @@ def read_file_contents(filename): # Use temporary directory context to do this to avoid issues with spaces in filenames, etc. import subprocess import tempfile + with tempfile.TemporaryDirectory() as tmpdir: cwd = os.getcwd() os.chdir(tmpdir) - local_input_filename = 'in.' + input_format + local_input_filename = "in." + input_format import shutil + shutil.copy(molecule_filename, local_input_filename) # Determine whether antechamber supports -dr [yes/no] option - cmd = f'antechamber -h | grep dr' + cmd = "antechamber -h | grep dr" supports_acdoctor = False - if ('acdoctor' in subprocess.getoutput(cmd)): + if "acdoctor" in subprocess.getoutput(cmd): supports_acdoctor = True - if (self._gaff_major_version == '1'): - atom_type = 'gaff' - elif (self._gaff_major_version == '2'): - atom_type = 'gaff2' + if self._gaff_major_version == "1": + atom_type = "gaff" + elif self._gaff_major_version == "2": + atom_type = "gaff2" else: - raise ValueError(f'gaff major version {self._gaff_major_version} unknown') + raise ValueError( + f"gaff major version {self._gaff_major_version} unknown" + ) # Run antechamber without charging (which is done separately) - cmd = f'antechamber -i {local_input_filename} -fi {input_format} -o out.mol2 -fo mol2 -s {verbosity} -at {atom_type}' + cmd = f"antechamber -i {local_input_filename} -fi {input_format} -o out.mol2 -fo mol2 -s {verbosity} -at {atom_type}" if supports_acdoctor: - cmd += ' -dr ' + ('yes' if verbosity else 'no') + cmd += " -dr " + ("yes" if verbosity else "no") _logger.debug(cmd) output = subprocess.getoutput(cmd) import os - if not os.path.exists('out.mol2'): - msg = "antechamber failed to produce output mol2 file\n" + + if not os.path.exists("out.mol2"): + msg = "antechamber failed to produce output mol2 file\n" msg += "command: %s\n" % cmd msg += "output:\n" - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" msg += output - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" msg += "input:\n" - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" msg += read_file_contents(local_input_filename) - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" # TODO: Run antechamber again with acdoctor mode on (-dr yes) to get more debug info, if supported os.chdir(cwd) raise Exception(msg) _logger.debug(output) # Run parmchk. - shutil.copy(self.gaff_dat_filename, 'gaff.dat') + shutil.copy(self.gaff_dat_filename, "gaff.dat") cmd = f"parmchk2 -i out.mol2 -f mol2 -p gaff.dat -o out.frcmod -s {self._gaff_major_version}" _logger.debug(cmd) output = subprocess.getoutput(cmd) - if not os.path.exists('out.frcmod'): - msg = "parmchk2 failed to produce output frcmod file\n" + if not os.path.exists("out.frcmod"): + msg = "parmchk2 failed to produce output frcmod file\n" msg += "command: %s\n" % cmd msg += "output:\n" - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" msg += output - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" msg += "input mol2:\n" - msg += 8 * "----------" + '\n' - msg += read_file_contents('out.mol2') - msg += 8 * "----------" + '\n' + msg += 8 * "----------" + "\n" + msg += read_file_contents("out.mol2") + msg += 8 * "----------" + "\n" os.chdir(cwd) raise Exception(msg) _logger.debug(output) self._check_for_errors(output) # Copy back - shutil.copy( 'out.mol2', gaff_mol2_filename ) - shutil.copy( 'out.frcmod', frcmod_filename ) + shutil.copy("out.mol2", gaff_mol2_filename) + shutil.copy("out.frcmod", frcmod_filename) os.chdir(cwd) @@ -874,7 +988,7 @@ def _read_gaff_atom_types_from_mol2(self, gaff_mol2_filename, molecule): line = infile.readline() # Seek to ATOM block while line: - if line.strip() == '@ATOM': + if line.strip() == "@ATOM": break line = infile.readline() # Read GAFF atom types @@ -902,17 +1016,17 @@ def _check_for_errors(self, outputtext, other_errors=None, ignore_errors=None): """ - lines = outputtext.split('\n') + lines = outputtext.split("\n") error_lines = [] for line in lines: - if 'ERROR' in line.upper(): - error_lines.append( line ) - if not other_errors == None: + if "ERROR" in line.upper(): + error_lines.append(line) + if other_errors is not None: for err in other_errors: if err.upper() in line.upper(): - error_lines.append( line ) + error_lines.append(line) - if not ignore_errors == None and len(error_lines)>0: + if ignore_errors is not None and len(error_lines) > 0: new_error_lines = [] for ign in ignore_errors: ignore = False @@ -920,27 +1034,30 @@ def _check_for_errors(self, outputtext, other_errors=None, ignore_errors=None): if ign in err: ignore = True if not ignore: - new_error_lines.append( err ) + new_error_lines.append(err) error_lines = new_error_lines if len(error_lines) > 0: - _logger.warning("Unexpected errors encountered running AMBER tool. Offending output:") + _logger.warning( + "Unexpected errors encountered running AMBER tool. Offending output:" + ) for line in error_lines: _logger.warning(line) - raise(RuntimeError("Error encountered running AMBER tool. Exiting.")) + raise (RuntimeError("Error encountered running AMBER tool. Exiting.")) return + ################################################################################ # MixIn for force field template generators that produce OpenMM System objects ################################################################################ + class OpenMMSystemMixin: - """ - """ + """ """ + def clear_system_cache(self): - """Initialize the OpenMM System cache - """ + """Initialize the OpenMM System cache""" self._system_cache = dict() def cache_system(self, smiles, system): @@ -975,7 +1092,9 @@ def get_openmm_system(self, molecule): else: return None - def convert_system_to_ffxml(self, molecule, system, improper_atom_ordering='smirnoff'): + def convert_system_to_ffxml( + self, molecule, system, improper_atom_ordering="smirnoff" + ): """Convert OpenMM System object to molecule-specific OpenMM ffxml Parameters @@ -997,6 +1116,7 @@ def convert_system_to_ffxml(self, molecule, system, improper_atom_ordering='smir # Generate OpenMM ffxml definition for this molecule from lxml import etree + root = etree.Element("ForceField") def as_attrib(quantity): @@ -1012,8 +1132,10 @@ def as_attrib(quantity): return str(quantity.m) else: from openmm.unit import Quantity as OpenMMQuantity + if isinstance(quantity, OpenMMQuantity): from openmm import unit + return str(quantity.value_in_unit_system(unit.md_unit_system)) else: raise ValueError(f"Found unexpected type {type(quantity)}.") @@ -1021,17 +1143,23 @@ def as_attrib(quantity): # Append unique type names to atoms smiles = molecule.to_smiles() for index, atom in enumerate(molecule.atoms): - setattr(atom, 'typename', f'{smiles}${atom.name}#{index}') + setattr(atom, "typename", f"{smiles}${atom.name}#{index}") # Generate atom types atom_types = etree.SubElement(root, "AtomTypes") for atom_index, atom in enumerate(molecule.atoms): # Create a new atom type for each atom in the molecule - paricle_indices = [atom_index] element_symbol = atom.element.symbol if uses_old_api else atom.symbol - atom_type = etree.SubElement(atom_types, "Type", name=atom.typename, - element=element_symbol, mass=as_attrib(atom.mass)) - atom_type.set('class', atom.typename) # 'class' is a reserved Python keyword, so use alternative API + atom_type = etree.SubElement( + atom_types, + "Type", + name=atom.typename, + element=element_symbol, + mass=as_attrib(atom.mass), + ) + atom_type.set( + "class", atom.typename + ) # 'class' is a reserved Python keyword, so use alternative API supported_forces = { "NonbondedForce", @@ -1046,9 +1174,13 @@ def as_attrib(quantity): force_name = force.__class__.__name__ if force_name in forces: - raise ForceException(f"Two instances of force {force_name} appear in System") + raise ForceException( + f"Two instances of force {force_name} appear in System" + ) if force_name not in supported_forces: - raise ForceException(f"Custom forces not supported. Found force of type {force_name}.") + raise ForceException( + f"Custom forces not supported. Found force of type {force_name}." + ) forces[force_name] = force @@ -1065,99 +1197,168 @@ def classes(atom_indices): classmap : dict of str : str Dict of format { 'class1' : typename1, ... } """ - return { f'class{class_index+1}' : molecule.atoms[atom_index].typename for class_index,atom_index in enumerate(atom_indices) } + return { + f"class{class_index+1}": molecule.atoms[atom_index].typename + for class_index, atom_index in enumerate(atom_indices) + } # Lennard-Jones # TODO: Get coulomb14scale and lj14scale from SMIRNOFF ForceField object, # though this must match the original AMBER values - nonbonded_types = etree.SubElement(root, "NonbondedForce", coulomb14scale="0.833333", lj14scale="0.5") + nonbonded_types = etree.SubElement( + root, "NonbondedForce", coulomb14scale="0.833333", lj14scale="0.5" + ) etree.SubElement(nonbonded_types, "UseAttributeFromResidue", name="charge") - for atom_index in range(forces['NonbondedForce'].getNumParticles()): - charge, sigma, epsilon = forces['NonbondedForce'].getParticleParameters(atom_index) - nonbonded_type = etree.SubElement(nonbonded_types, "Atom", - sigma=as_attrib(sigma), epsilon=as_attrib(epsilon)) - nonbonded_type.set('class', molecule.atoms[atom_index].typename) # 'class' is a reserved Python keyword, so use alternative API + for atom_index in range(forces["NonbondedForce"].getNumParticles()): + charge, sigma, epsilon = forces["NonbondedForce"].getParticleParameters( + atom_index + ) + nonbonded_type = etree.SubElement( + nonbonded_types, + "Atom", + sigma=as_attrib(sigma), + epsilon=as_attrib(epsilon), + ) + nonbonded_type.set( + "class", molecule.atoms[atom_index].typename + ) # 'class' is a reserved Python keyword, so use alternative API # Bonds bond_types = etree.SubElement(root, "HarmonicBondForce") - atom_indices = [-1]*2 - for bond_index in range(forces['HarmonicBondForce'].getNumBonds()): - atom_indices[0], atom_indices[1], length, k = forces['HarmonicBondForce'].getBondParameters(bond_index) - bond_type = etree.SubElement(bond_types, "Bond", **classes(atom_indices), - length=as_attrib(length), k=as_attrib(k)) + atom_indices = [-1] * 2 + for bond_index in range(forces["HarmonicBondForce"].getNumBonds()): + atom_indices[0], atom_indices[1], length, k = forces[ + "HarmonicBondForce" + ].getBondParameters(bond_index) + + # This variable is not used - is it needed? + bond_type = etree.SubElement( # noqa + bond_types, + "Bond", + **classes(atom_indices), + length=as_attrib(length), + k=as_attrib(k), + ) # Angles angle_types = etree.SubElement(root, "HarmonicAngleForce") - atom_indices = [-1]*3 - for angle_index in range(forces['HarmonicAngleForce'].getNumAngles()): - atom_indices[0], atom_indices[1], atom_indices[2], angle, k = forces['HarmonicAngleForce'].getAngleParameters(angle_index) - angle_type = etree.SubElement(angle_types, "Angle", **classes(atom_indices), - angle=as_attrib(angle), k=as_attrib(k)) + atom_indices = [-1] * 3 + for angle_index in range(forces["HarmonicAngleForce"].getNumAngles()): + atom_indices[0], atom_indices[1], atom_indices[2], angle, k = forces[ + "HarmonicAngleForce" + ].getAngleParameters(angle_index) + + # This variable is not used - is it needed? + angle_type = etree.SubElement( # noqa + angle_types, + "Angle", + **classes(atom_indices), + angle=as_attrib(angle), + k=as_attrib(k), + ) # Torsions def torsion_tag(atom_indices): """Return 'Proper' or 'Improper' depending on torsion type""" - atoms = [ molecule.atoms[atom_index] for atom_index in atom_indices ] + atoms = [molecule.atoms[atom_index] for atom_index in atom_indices] # TODO: Check to make sure all atoms are in fact atoms and not virtual sites - if atoms[0].is_bonded_to(atoms[1]) and atoms[1].is_bonded_to(atoms[2]) and atoms[2].is_bonded_to(atoms[3]): + if ( + atoms[0].is_bonded_to(atoms[1]) + and atoms[1].is_bonded_to(atoms[2]) + and atoms[2].is_bonded_to(atoms[3]) + ): return "Proper" else: return "Improper" # Collect torsions torsions = dict() - for torsion_index in range(forces['PeriodicTorsionForce'].getNumTorsions()): - atom_indices = [-1]*4 - atom_indices[0], atom_indices[1], atom_indices[2], atom_indices[3], periodicity, phase, k = forces['PeriodicTorsionForce'].getTorsionParameters(torsion_index) + for torsion_index in range(forces["PeriodicTorsionForce"].getNumTorsions()): + atom_indices = [-1] * 4 + ( + atom_indices[0], + atom_indices[1], + atom_indices[2], + atom_indices[3], + periodicity, + phase, + k, + ) = forces["PeriodicTorsionForce"].getTorsionParameters(torsion_index) atom_indices = tuple(atom_indices) if atom_indices in torsions.keys(): - torsions[atom_indices].append( (periodicity, phase, k) ) + torsions[atom_indices].append((periodicity, phase, k)) else: - torsions[atom_indices] = [ (periodicity, phase, k) ] + torsions[atom_indices] = [(periodicity, phase, k)] # Create torsion definitions - torsion_types = etree.SubElement(root, "PeriodicTorsionForce", ordering='smirnoff') + torsion_types = etree.SubElement( + root, "PeriodicTorsionForce", ordering="smirnoff" + ) for atom_indices in torsions.keys(): - params = dict() # build parameter dictionary + params = dict() # build parameter dictionary nterms = len(torsions[atom_indices]) for term in range(nterms): periodicity, phase, k = torsions[atom_indices][term] - params[f'periodicity{term+1}'] = as_attrib(periodicity) - params[f'phase{term+1}'] = as_attrib(phase) - params[f'k{term+1}'] = as_attrib(k) - torsion_type = etree.SubElement(torsion_types, torsion_tag(atom_indices), **classes(atom_indices), **params) + params[f"periodicity{term+1}"] = as_attrib(periodicity) + params[f"phase{term+1}"] = as_attrib(phase) + params[f"k{term+1}"] = as_attrib(k) + + # This variable is not used - is it needed? + torsion_type = etree.SubElement( # noqa + torsion_types, + torsion_tag(atom_indices), + **classes(atom_indices), + **params, + ) # TODO: Handle virtual sites - virtual_sites = [ atom_index for atom_index in range(system.getNumParticles()) if system.isVirtualSite(atom_index) ] + virtual_sites = [ + atom_index + for atom_index in range(system.getNumParticles()) + if system.isVirtualSite(atom_index) + ] if len(virtual_sites) > 0: - raise Exception('Virtual sites are not yet supported') + raise Exception("Virtual sites are not yet supported") # Create residue definitions # TODO: Handle non-Atom atoms too (virtual sites) residues = etree.SubElement(root, "Residues") residue = etree.SubElement(residues, "Residue", name=smiles) for atom_index, atom in enumerate(molecule.atoms): - charge, sigma, epsilon = forces['NonbondedForce'].getParticleParameters(atom_index) - atom = etree.SubElement(residue, "Atom", name=atom.name, type=atom.typename, charge=as_attrib(charge)) + charge, sigma, epsilon = forces["NonbondedForce"].getParticleParameters( + atom_index + ) + atom = etree.SubElement( + residue, + "Atom", + name=atom.name, + type=atom.typename, + charge=as_attrib(charge), + ) for bond in molecule.bonds: - bond = etree.SubElement(residue, "Bond", atomName1=bond.atom1.name, atomName2=bond.atom2.name) + bond = etree.SubElement( + residue, "Bond", atomName1=bond.atom1.name, atomName2=bond.atom2.name + ) # Render XML into string - ffxml_contents = etree.tostring(root, pretty_print=True, encoding='unicode') + ffxml_contents = etree.tostring(root, pretty_print=True, encoding="unicode") - #_logger.debug(f'{ffxml_contents}') # DEBUG + # _logger.debug(f'{ffxml_contents}') # DEBUG return ffxml_contents + ################################################################################ # Open Force Field Initiative SMIRNOFF specific OpenMM ForceField template generation utilities ################################################################################ + class ClassProperty(property): def __get__(self, cls, owner): return self.fget.__get__(None, owner)() -class SMIRNOFFTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin): + +class SMIRNOFFTemplateGenerator(SmallMoleculeTemplateGenerator, OpenMMSystemMixin): """ OpenMM ForceField residue template generator for Open Force Field Initiative SMIRNOFF force fields using pre-cached OpenFF toolkit molecules. @@ -1207,6 +1408,7 @@ class SMIRNOFFTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin Newly parameterized molecules will be written to the cache, saving time next time! """ + def __init__(self, molecules=None, cache=None, forcefield=None, **kwargs): """ Create a SMIRNOFFTemplateGenerator with some OpenFF toolkit molecules @@ -1275,7 +1477,7 @@ def __init__(self, molecules=None, cache=None, forcefield=None, **kwargs): if forcefield is None: # Use latest supported Open Force Field Initiative release if none is specified - forcefield = 'openff-2.1.0' + forcefield = "openff-2.1.0" # TODO: After toolkit provides date-ranked force fields, # use latest dated version if we can sort by date, such as self.INSTALLED_FORCEFIELDS[-1] self._forcefield = forcefield @@ -1284,27 +1486,38 @@ def __init__(self, molecules=None, cache=None, forcefield=None, **kwargs): # TODO: Can we instead use the force field hash, or some other unique identifier? # TODO: Use file hash instead of name? import os + self._database_table_name = os.path.basename(forcefield) # Create ForceField object import openff.toolkit.typing.engines.smirnoff # check for an installed force field - available_force_fields = openff.toolkit.typing.engines.smirnoff.get_available_force_fields() - if (filename := forcefield + ".offxml") in available_force_fields or (filename := forcefield) in available_force_fields: - self._smirnoff_forcefield = openff.toolkit.typing.engines.smirnoff.ForceField(filename) + available_force_fields = ( + openff.toolkit.typing.engines.smirnoff.get_available_force_fields() + ) + if (filename := forcefield + ".offxml") in available_force_fields or ( + filename := forcefield + ) in available_force_fields: + self._smirnoff_forcefield = ( + openff.toolkit.typing.engines.smirnoff.ForceField(filename) + ) # just try parsing the input and let openff handle the error else: try: - self._smirnoff_forcefield = openff.toolkit.typing.engines.smirnoff.ForceField(forcefield) + self._smirnoff_forcefield = ( + openff.toolkit.typing.engines.smirnoff.ForceField(forcefield) + ) except Exception as e: _logger.error(e) - raise ValueError(f"Can't find specified SMIRNOFF force field ({forcefield}) in install paths or parse the input as a string.") from e + raise ValueError( + f"Can't find specified SMIRNOFF force field ({forcefield}) in install paths or parse the input as a string." + ) from e # Delete constraints, if present - if 'Constraints' in self._smirnoff_forcefield._parameter_handlers: - del self._smirnoff_forcefield._parameter_handlers['Constraints'] + if "Constraints" in self._smirnoff_forcefield._parameter_handlers: + del self._smirnoff_forcefield._parameter_handlers["Constraints"] # Find SMIRNOFF filename smirnoff_filename = self._search_paths(filename) @@ -1330,17 +1543,18 @@ def INSTALLED_FORCEFIELDS(cls): """ from openff.toolkit.typing.engines.smirnoff import get_available_force_fields + file_names = list() for filename in get_available_force_fields(full_paths=False): root, ext = os.path.splitext(filename) # Only add variants without '_unconstrained' - if '_unconstrained' in root: + if "_unconstrained" in root: continue # The OpenFF Toolkit ships two versions of its ff14SB port, one with SMIRNOFF-style # impropers and one with Amber-style impropers. The latter requires a special handler # (`AmberImproperTorsionHandler`) that is not shipped with the toolkit. See # https://github.com/openforcefield/amber-ff-porting/tree/0.0.3 - if root.startswith("ff14sb") and 'off_impropers' not in root: + if root.startswith("ff14sb") and "off_impropers" not in root: continue file_names.append(root) @@ -1374,7 +1588,7 @@ def _search_paths(self, filename): # Check whether this could be a file path if isinstance(filename, str): # Try first the simple path. - searched_dirs_paths = [''] + searched_dirs_paths = [""] # Then try a relative file path w.r.t. an installed directory. searched_dirs_paths.extend(_get_installed_offxml_dir_paths()) @@ -1421,7 +1635,9 @@ def generate_residue_template(self, molecule, residue_atoms=None): """ # Use the canonical isomeric SMILES to uniquely name the template smiles = molecule.to_smiles() - _logger.info(f'Generating a residue template for {smiles} using {self._forcefield}') + _logger.info( + f"Generating a residue template for {smiles} using {self._forcefield}" + ) # Generate unique atom names self._generate_unique_atom_names(molecule) @@ -1431,26 +1647,32 @@ def generate_residue_template(self, molecule, residue_atoms=None): if self._molecule_has_user_charges(molecule): charge_from_molecules = [molecule] - _logger.debug(f'Using user-provided charges because partial charges are nonzero...') + _logger.debug( + "Using user-provided charges because partial charges are nonzero..." + ) # Parameterize molecule - _logger.debug(f'Generating parameters...') - system = self._smirnoff_forcefield.create_openmm_system(molecule.to_topology(), charge_from_molecules=charge_from_molecules) + _logger.debug("Generating parameters...") + system = self._smirnoff_forcefield.create_openmm_system( + molecule.to_topology(), charge_from_molecules=charge_from_molecules + ) self.cache_system(smiles, system) # Convert to ffxml ffxml_contents = self.convert_system_to_ffxml(molecule, system) return ffxml_contents + ################################################################################ # Espaloma template generation utilities ################################################################################ -class EspalomaTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin): + +class EspalomaTemplateGenerator(SmallMoleculeTemplateGenerator, OpenMMSystemMixin): """ OpenMM ForceField residue template generator for espaloma force fields using pre-cached OpenFF toolkit molecules. - Espaloma uses a graph net approach to chemical perception to assign parameters and charges. + Espaloma uses a graph net approach to chemical perception to assign parameters and charges. * Espaloma docs and papers: https://docs.espaloma.org/ * Espaloma code and models: https://github.com/choderalab/espaloma @@ -1500,9 +1722,18 @@ class EspalomaTemplateGenerator(SmallMoleculeTemplateGenerator,OpenMMSystemMixin Newly parameterized molecules will be written to the cache, saving time next time! """ - CHARGE_METHODS = ('nn', 'am1-bcc', 'gasteiger', 'from-molecule') - def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path=None, template_generator_kwargs=None, **kwargs): + CHARGE_METHODS = ("nn", "am1-bcc", "gasteiger", "from-molecule") + + def __init__( + self, + molecules=None, + cache=None, + forcefield=None, + model_cache_path=None, + template_generator_kwargs=None, + **kwargs, + ): """ Create an EspalomaTemplateGenerator with some OpenFF toolkit molecules @@ -1527,12 +1758,12 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path If specified, use this directory to cache espaloma models default: ~/.espaloma/ template_generator_kwargs : dict, optional, default=None - Optional keyword arguments: + Optional keyword arguments: {"reference_forcefield": str, Openff force field supported by https://github.com/openforcefield/openff-forcefields without .offxml extension} - {"charge_method": str, Charge method supported by espaloma ['nn', 'am1-bcc', 'gasteiger', 'from-molecule']} - + {"charge_method": str, Charge method supported by espaloma ['nn', 'am1-bcc', 'gasteiger', 'from-molecule']} + Default behavior is to use ``openff_unconstrained-2.0.0`` for ``reference_forcefield`` and `nn` for `charge_method`. - User defined charges can be assigned by setting the ``charge_method`` to ``from_molecule`` + User defined charges can be assigned by setting the ``charge_method`` to ``from_molecule`` if charges are assigned to openff.toolkit.Molecule. Examples @@ -1584,13 +1815,14 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path # Espaloma model cache path if model_cache_path is None: import os + self.ESPALOMA_MODEL_CACHE_PATH = f'{os.getenv("HOME")}/.espaloma' else: self.ESPALOMA_MODEL_CACHE_PATH = model_cache_path if forcefield is None: # Use latest supported Espaloma force field release if none is specified - forcefield = 'espaloma-0.3.2' + forcefield = "espaloma-0.3.2" # TODO: After toolkit provides date-ranked force fields, # use latest dated version if we can sort by date, such as self.INSTALLED_FORCEFIELDS[-1] self._forcefield = forcefield @@ -1600,19 +1832,22 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path # Check reference forcefield and charge method if template_generator_kwargs is not None: - self._reference_forcefield = template_generator_kwargs.get('reference_forcefield', 'openff_unconstrained-2.0.0') - self._charge_method = template_generator_kwargs.get('charge_method', 'nn') + self._reference_forcefield = template_generator_kwargs.get( + "reference_forcefield", "openff_unconstrained-2.0.0" + ) + self._charge_method = template_generator_kwargs.get("charge_method", "nn") else: # Consider upgrading to 2.1.0, the recommended small moleucle force field for general use - self._reference_forcefield = 'openff_unconstrained-2.0.0' - self._charge_method = 'from-molecule' + self._reference_forcefield = "openff_unconstrained-2.0.0" + self._charge_method = "from-molecule" # Check to make sure dependencies are installed try: - import espaloma - except ImportError as e: - msg = 'The EspalomaResidueTemplateGenerator requires espaloma to be installed' - raise ValueError(msg) + import espaloma # noqa + except ImportError: + raise ValueError( + "The EspalomaResidueTemplateGenerator requires espaloma to be installed" + ) # Check force field can be found @@ -1620,11 +1855,15 @@ def __init__(self, molecules=None, cache=None, forcefield=None, model_cache_path # TODO: Can we instead use the force field hash, or some other unique identifier? # TODO: Use file hash instead of name? import os + self._database_table_name = os.path.basename(forcefield) # Load torch model import torch - self.espaloma_model = torch.load(self.espaloma_model_filepath, map_location=torch.device('cpu')) + + self.espaloma_model = torch.load( + self.espaloma_model_filepath, map_location=torch.device("cpu") + ) self.espaloma_model.eval() # Cache a copy of the OpenMM System generated for each molecule for testing purposes @@ -1638,7 +1877,7 @@ def INSTALLED_FORCEFIELDS(self): # TODO: Update this # TODO: Can we list force fields installed locally? # TODO: Maybe we can check ~/.espaloma and ESPALOMA_PATH? - return ['espaloma-0.3.2'] + return ["espaloma-0.3.2"] def _get_model_filepath(self, forcefield): """Retrieve local file path to cached espaloma model parameters, or retrieve remote model if needed. @@ -1654,37 +1893,42 @@ def _get_model_filepath(self, forcefield): Path to local cache of espaloma .pt model parameters """ import os + if os.path.exists(forcefield): # A specific file path has been specified - _logger.info(f'Using espaloma model found at {forcefield}') + _logger.info(f"Using espaloma model found at {forcefield}") return forcefield # TODO: This isn't quite right---we should be checking this in the previous branch? elif os.path.exists(os.path.join(self.ESPALOMA_MODEL_CACHE_PATH, forcefield)): # A specific file path has been specified filepath = os.path.join(self.ESPALOMA_MODEL_CACHE_PATH, forcefield) - _logger.info(f'Using espaloma model found at {filepath}') + _logger.info(f"Using espaloma model found at {filepath}") return filepath else: import validators + if validators.url(forcefield): # URL has been provided url = forcefield - filename = os.path.basename(url) # local filename for caching + filename = os.path.basename(url) # local filename for caching else: # Identify version number import re - m = re.match(r'espaloma-(\d+\.\d+\.\d+)', forcefield) + + m = re.match(r"espaloma-(\d+\.\d+\.\d+)", forcefield) if m is None: - raise ValueError(f'Espaloma model must be filepath or formatted like "espaloma-0.3.2" (found: "{forcefield}")') + raise ValueError( + f'Espaloma model must be filepath or formatted like "espaloma-0.3.2" (found: "{forcefield}")' + ) version = m.group(1) # Construct URL - url = f'https://github.com/choderalab/espaloma/releases/download/{version}/espaloma-{version}.pt' - filename = f'espaloma-{version}.pt' # local filename for caching + url = f"https://github.com/choderalab/espaloma/releases/download/{version}/espaloma-{version}.pt" + filename = f"espaloma-{version}.pt" # local filename for caching # Check cache cached_filename = os.path.join(self.ESPALOMA_MODEL_CACHE_PATH, filename) if os.path.exists(cached_filename): - _logger.info(f'Using espaloma model cached at {cached_filename}') + _logger.info(f"Using espaloma model cached at {cached_filename}") return cached_filename else: # Create the cache directory @@ -1692,16 +1936,19 @@ def _get_model_filepath(self, forcefield): os.makedirs(self.ESPALOMA_MODEL_CACHE_PATH) # Attempt to retrieve from URL - _logger.info(f'Attempting to retrieve espaloma model from {url}') + _logger.info(f"Attempting to retrieve espaloma model from {url}") import urllib import urllib.error import urllib.request + try: urllib.request.urlretrieve(url, filename=cached_filename) - except urllib.error.URLError as e: - raise ValueError(f'No espaloma model found at expected URL: {url}') + except urllib.error.URLError: + raise ValueError(f"No espaloma model found at expected URL: {url}") except urllib.error.HTTPError as e: - raise ValueError(f'An error occurred while retrieving espaloma model from {url} : {e}') + raise ValueError( + f"An error occurred while retrieving espaloma model from {url} : {e}" + ) return cached_filename @property @@ -1735,22 +1982,27 @@ def generate_residue_template(self, molecule, residue_atoms=None): """ from openmm import unit + # Use the canonical isomeric SMILES to uniquely name the template smiles = molecule.to_smiles() - _logger.info(f'Generating a residue template for {smiles} using {self._forcefield}') + _logger.info( + f"Generating a residue template for {smiles} using {self._forcefield}" + ) # Generate unique atom names self._generate_unique_atom_names(molecule) # Parameterize molecule - _logger.debug(f'Generating espaloma parameters...') + _logger.debug("Generating espaloma parameters...") # create an Espaloma Graph object to represent the molecule of interest import espaloma as esp + molecule_graph = esp.Graph(molecule) # Regenerate SMIRNOFF impropers from espaloma.graphs.utils.regenerate_impropers import regenerate_impropers + regenerate_impropers(molecule_graph) # Book keep partial charges if molecule has user charges @@ -1759,7 +2011,9 @@ def generate_residue_template(self, molecule, residue_atoms=None): # TODO: Change this to use openff.units exclusively? # Make sure charges are in the right openmm units _partial_charges = molecule.partial_charges - if all([isinstance(_charge, unit.Quantity) for _charge in _partial_charges]): + if all( + [isinstance(_charge, unit.Quantity) for _charge in _partial_charges] + ): _charges = _partial_charges.value_in_unit(esp.units.CHARGE_UNIT) else: # Assuming charges are in openff units @@ -1770,24 +2024,37 @@ def generate_residue_template(self, molecule, residue_atoms=None): # Create an OpenMM System # Update partial charges if charge_method is "from_molecule" - if self._charge_method == 'from-molecule': + if self._charge_method == "from-molecule": if self._molecule_has_user_charges(molecule): - import torch import numpy as np + import torch + # Handle ValueError: - # "ValueError: given numpy array has byte order different from the native byte order. + # "ValueError: given numpy array has byte order different from the native byte order. # Conversion between byte orders is currently not supported." _charges = _charges.astype(np.float32) - molecule_graph.nodes['n1'].data['q'] = torch.from_numpy(_charges).unsqueeze(-1).float() + molecule_graph.nodes["n1"].data["q"] = ( + torch.from_numpy(_charges).unsqueeze(-1).float() + ) else: # No charges were found in molecule -- defaulting to nn charge method - warnings.warn("No charges found in molecule. Defaulting to 'nn' charge method.") + warnings.warn( + "No charges found in molecule. Defaulting to 'nn' charge method." + ) self._charge_method = "nn" - system = esp.graphs.deploy.openmm_system_from_graph(molecule_graph, charge_method=self._charge_method, forcefield=self._reference_forcefield) - _logger.info(f'Generating a system with charge method {self._charge_method} and {self._reference_forcefield} to assign nonbonded parameters') + system = esp.graphs.deploy.openmm_system_from_graph( + molecule_graph, + charge_method=self._charge_method, + forcefield=self._reference_forcefield, + ) + _logger.info( + f"Generating a system with charge method {self._charge_method} and {self._reference_forcefield} to assign nonbonded parameters" + ) self.cache_system(smiles, system) # Convert to ffxml - ffxml_contents = self.convert_system_to_ffxml(molecule, system, improper_atom_ordering='smirnoff') + ffxml_contents = self.convert_system_to_ffxml( + molecule, system, improper_atom_ordering="smirnoff" + ) return ffxml_contents diff --git a/openmmforcefields/tests/conftest.py b/openmmforcefields/tests/conftest.py index cbbb0949..bdcd1c6b 100644 --- a/openmmforcefields/tests/conftest.py +++ b/openmmforcefields/tests/conftest.py @@ -2,6 +2,7 @@ import pytest + def pytest_addoption(parser): parser.addoption( "--runespaloma", action="store_true", default=False, help="run espaloma tests" @@ -18,4 +19,4 @@ def pytest_collection_modifyitems(config, items): if not config.getoption("--runespaloma"): for item in items: if "espaloma" in item.keywords: - item.add_marker(skip_slow) \ No newline at end of file + item.add_marker(skip_slow) diff --git a/openmmforcefields/tests/test_amber_import.py b/openmmforcefields/tests/test_amber_import.py index 38024c38..4b4da1ef 100644 --- a/openmmforcefields/tests/test_amber_import.py +++ b/openmmforcefields/tests/test_amber_import.py @@ -10,10 +10,15 @@ from openmmforcefields.utils import get_ffxml_path amber_ffxml_filenames: List[str] = [ - 'amber/' + file.name for file in pathlib.Path(get_ffxml_path()).glob("amber/*xml") + "amber/" + file.name for file in pathlib.Path(get_ffxml_path()).glob("amber/*xml") ] -@pytest.mark.parametrize("filename", amber_ffxml_filenames, ids=lambda filename : f'Importing ffxml file {filename}') + +@pytest.mark.parametrize( + "filename", + amber_ffxml_filenames, + ids=lambda filename: f"Importing ffxml file {filename}", +) def test_ffxml_import(filename): """ Attempt to load OpenMM ffxml forcefield file. @@ -27,20 +32,21 @@ def test_ffxml_import(filename): from openmm import app # Handle special cases - if filename == 'amber/phosaa10.xml': + if filename == "amber/phosaa10.xml": # Must be used with ff99SB.xml - filenames = ['amber/ff99SB.xml', 'amber/phosaa10.xml'] - ff = app.ForceField(*filenames) - elif filename == 'amber/phosaa14SB.xml': + filenames = ["amber/ff99SB.xml", "amber/phosaa10.xml"] + app.ForceField(*filenames) + elif filename == "amber/phosaa14SB.xml": # Must be used with ff14SB.xml - filenames = ['amber/ff14SB.xml', 'amber/phosaa14SB.xml'] - ff = app.ForceField(*filenames) - elif filename == 'amber/GLYCAM_06j-1.xml': + filenames = ["amber/ff14SB.xml", "amber/phosaa14SB.xml"] + app.ForceField(*filenames) + elif filename == "amber/GLYCAM_06j-1.xml": # Must be used with protein.ff14SB.xml - filenames = ['amber/protein.ff14SB.xml', 'amber/GLYCAM_06j-1.xml'] - ff = app.ForceField(*filenames) + filenames = ["amber/protein.ff14SB.xml", "amber/GLYCAM_06j-1.xml"] + app.ForceField(*filenames) else: - ff = app.ForceField(filename) + app.ForceField(filename) + def check_ffxml_parameterize(pdb_filename, ffxml_filename): """ @@ -55,8 +61,10 @@ def check_ffxml_parameterize(pdb_filename, ffxml_filename): """ from openmm import app - pdbfile = app.PDBFile(pdb_filename) - ff = app.ForceField(ffxml_filename) + + app.PDBFile(pdb_filename) + app.ForceField(ffxml_filename) + def test_amber_import_ff94(): """ @@ -64,7 +72,9 @@ def test_amber_import_ff94(): """ from openmm import app - ff = app.ForceField('amber/ff94.xml') + + app.ForceField("amber/ff94.xml") + def test_amber_parameterize_ff94(): """ @@ -72,5 +82,6 @@ def test_amber_parameterize_ff94(): """ from pkg_resources import resource_filename - pdb_filename = resource_filename('openmm.app', 'data/test.pdb') - check_ffxml_parameterize(pdb_filename, 'amber/ff94.xml') + + pdb_filename = resource_filename("openmm.app", "data/test.pdb") + check_ffxml_parameterize(pdb_filename, "amber/ff94.xml") diff --git a/openmmforcefields/tests/test_system_generator.py b/openmmforcefields/tests/test_system_generator.py index 537db94e..68c4e27b 100644 --- a/openmmforcefields/tests/test_system_generator.py +++ b/openmmforcefields/tests/test_system_generator.py @@ -1,7 +1,6 @@ import copy import os import tempfile -import unittest import numpy as np import openmm @@ -12,20 +11,19 @@ from openmmforcefields.generators import SystemGenerator from openmmforcefields.utils import Timer, get_data_filename -CI = ('CI' in os.environ) +CI = "CI" in os.environ ################################################################################ # Tests ################################################################################ -@pytest.fixture(scope="class", autouse=True) - +@pytest.fixture(scope="class", autouse=True) def test_systems(): testsystems = dict() - for (system_name, prefix) in [ + for system_name, prefix in [ # TODO: Uncomment these after we fix input files - ('bace', 'Bace'), + ("bace", "Bace"), # ('cdk1', 'CDK2'), # ('jnk1', 'Jnk1'), # ('mcl1', 'MCL1'), @@ -35,21 +33,26 @@ def test_systems(): # ('tyk2', 'Tyk2'), ]: # Load protein - pdb_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_protein.pdb')) + pdb_filename = get_data_filename( + os.path.join("perses_jacs_systems", system_name, prefix + "_protein.pdb") + ) pdbfile = PDBFile(pdb_filename) # Load molecules sdf_filename = get_data_filename( - os.path.join('perses_jacs_systems', system_name, prefix + '_ligands_shifted.sdf')) + os.path.join( + "perses_jacs_systems", system_name, prefix + "_ligands_shifted.sdf" + ) + ) molecules = Molecule.from_file(sdf_filename, allow_undefined_stereo=True) - print(f'Read {len(molecules)} molecules from {sdf_filename}') + print(f"Read {len(molecules)} molecules from {sdf_filename}") n_molecules = len(molecules) # Limit number of molecules for testing MAX_MOLECULES = 10 if not CI else 2 - if (n_molecules > MAX_MOLECULES): - print(f'Limiting to {MAX_MOLECULES} for testing...') + if n_molecules > MAX_MOLECULES: + print(f"Limiting to {MAX_MOLECULES} for testing...") n_molecules = MAX_MOLECULES molecules = [molecules[index] for index in range(n_molecules)] @@ -59,33 +62,46 @@ def test_systems(): # NOTE: This does not work because parmed does not correctly assign bonds for HID # protein_structure = parmed.load_file(pdb_filename) # NOTE: This is the workaround - protein_structure = parmed.openmm.load_topology(pdbfile.topology, xyz=pdbfile.positions) + protein_structure = parmed.openmm.load_topology( + pdbfile.topology, xyz=pdbfile.positions + ) molecules_structure = parmed.load_file(sdf_filename) - molecules_structure = [molecules_structure[index] for index in range(n_molecules)] - - complex_structures = [(molecules_structure[index] + protein_structure) for index in range(n_molecules)] - complex_structures = [molecules_structure[index] for index in range(n_molecules)] # DEBUG + molecules_structure = [ + molecules_structure[index] for index in range(n_molecules) + ] + + complex_structures = [ + (molecules_structure[index] + protein_structure) + for index in range(n_molecules) + ] + complex_structures = [ + molecules_structure[index] for index in range(n_molecules) + ] # DEBUG # Store testsystem = { - 'name': system_name, - 'protein_pdbfile': pdbfile, - 'molecules': molecules, - 'complex_structures': complex_structures + "name": system_name, + "protein_pdbfile": pdbfile, + "molecules": molecules, + "complex_structures": complex_structures, } testsystems[system_name] = testsystem # DEBUG for name, testsystem in testsystems.items(): - filename = f'testsystem-{name}.pdb' + filename = f"testsystem-{name}.pdb" print(filename) - structure = testsystem['complex_structures'][0] + structure = testsystem["complex_structures"][0] # structure.save(filename, overwrite=True) - with open(filename, 'w') as outfile: + with open(filename, "w") as outfile: PDBFile.writeFile(structure.topology, structure.positions, outfile) - testsystem['molecules'][0].to_file(f'testsystem-{name}-molecule.sdf', file_format="SDF") - testsystem['molecules'][0].to_file(f'testsystem-{name}-molecule.pdb', file_format="PDB") + testsystem["molecules"][0].to_file( + f"testsystem-{name}-molecule.sdf", file_format="SDF" + ) + testsystem["molecules"][0].to_file( + f"testsystem-{name}-molecule.pdb", file_format="PDB" + ) # TODO: Create other test topologies # TODO: Protein-only @@ -95,7 +111,8 @@ def test_systems(): # Suppress DEBUG logging from various packages import logging - for name in ['parmed', 'matplotlib']: + + for name in ["parmed", "matplotlib"]: logging.getLogger(name).setLevel(logging.WARNING) return testsystems @@ -103,7 +120,11 @@ def test_systems(): class TestSystemGenerator(object): # AMBER force field combination to test - amber_forcefields = ['amber/protein.ff14SB.xml', 'amber/tip3p_standard.xml', 'amber/tip3p_HFE_multivalent.xml'] + amber_forcefields = [ + "amber/protein.ff14SB.xml", + "amber/tip3p_standard.xml", + "amber/tip3p_HFE_multivalent.xml", + ] def filter_molecules(self, molecules): """ @@ -123,7 +144,7 @@ def filter_molecules(self, molecules): # TODO: Eliminate molecules without fully-specified stereochemistry # Select some small molecules for fast testing MAX_ATOMS = 45 - molecules = [ molecule for molecule in molecules if molecule.n_atoms < MAX_ATOMS ] + molecules = [molecule for molecule in molecules if molecule.n_atoms < MAX_ATOMS] # Cut down number of tests for travis MAX_MOLECULES = 10 if not CI else 2 molecules = molecules[:MAX_MOLECULES] @@ -133,7 +154,7 @@ def filter_molecules(self, molecules): def test_create(self): """Test SystemGenerator creation with only OpenMM ffxml force fields""" # Create an empty system generator - generator = SystemGenerator() + SystemGenerator() def test_barostat(self): """Test that barostat addition works correctly""" @@ -142,6 +163,7 @@ def test_barostat(self): # Create a template barostat from openmm import MonteCarloBarostat, unit + pressure = 0.95 * unit.atmospheres temperature = 301.0 * unit.kelvin frequency = 23 @@ -149,16 +171,24 @@ def test_barostat(self): # Load a PDB file - pdb_filename = get_data_filename(os.path.join('perses_jacs_systems', 'mcl1', 'MCL1_protein.pdb')) + pdb_filename = get_data_filename( + os.path.join("perses_jacs_systems", "mcl1", "MCL1_protein.pdb") + ) pdbfile = PDBFile(pdb_filename) # Delete hydrogens from terminal protein residues # TODO: Fix the input files so we don't need to do this modeller = Modeller(pdbfile.topology, pdbfile.positions) - residues = [residue for residue in modeller.topology.residues() if residue.name != 'UNL'] + residues = [ + residue for residue in modeller.topology.residues() if residue.name != "UNL" + ] termini_ids = [residues[0].id, residues[-1].id] - #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] - hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.id in termini_ids] + # hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] + hs = [ + atom + for atom in modeller.topology.atoms() + if atom.element.symbol in ["H"] and atom.residue.id in termini_ids + ] modeller.delete(hs) modeller.addHydrogens() @@ -166,133 +196,187 @@ def test_barostat(self): system = generator.create_system(modeller.topology) # Check barostat is present - forces = { force.__class__.__name__ : force for force in system.getForces() } - assert 'MonteCarloBarostat' in forces.keys() + forces = {force.__class__.__name__: force for force in system.getForces()} + assert "MonteCarloBarostat" in forces.keys() # Check barostat parameters - force = forces['MonteCarloBarostat'] + force = forces["MonteCarloBarostat"] assert force.getDefaultPressure() == pressure assert force.getDefaultTemperature() == temperature assert force.getFrequency() == frequency - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) def test_create_with_template_generator(self, small_molecule_forcefield): """Test SystemGenerator creation with small molecule residue template generators""" # Create a generator that defines AMBER and small molecule force fields - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + ) # Create a generator that also has a database cache with tempfile.TemporaryDirectory() as tmpdirname: - cache = os.path.join(tmpdirname, 'db.json') + cache = os.path.join(tmpdirname, "db.json") # Create a new database file - generator = SystemGenerator(forcefields=self.amber_forcefields, - cache=cache, small_molecule_forcefield=small_molecule_forcefield) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + cache=cache, + small_molecule_forcefield=small_molecule_forcefield, + ) del generator # Reopen it (with cache still empty) - generator = SystemGenerator(forcefields=self.amber_forcefields, - cache=cache, small_molecule_forcefield=small_molecule_forcefield) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + cache=cache, + small_molecule_forcefield=small_molecule_forcefield, + ) del generator - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) def test_forcefield_default_kwargs(self, small_molecule_forcefield, test_systems): """Test that default forcefield kwargs work correctly""" from openmm import unit + forcefield_kwargs = dict() from openmmforcefields.generators import SystemGenerator for name, testsystem in test_systems.items(): print(testsystem) - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # Create a SystemGenerator for this force field - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield, - forcefield_kwargs=forcefield_kwargs, - molecules=molecules) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + forcefield_kwargs=forcefield_kwargs, + molecules=molecules, + ) # Parameterize molecules for molecule in molecules: # Create non-periodic Topology nonperiodic_openmm_topology = molecule.to_topology().to_openmm() system = generator.create_system(nonperiodic_openmm_topology) - forces = {force.__class__.__name__: force for force in system.getForces()} - assert forces['NonbondedForce'].getNonbondedMethod() == openmm.NonbondedForce.NoCutoff, "Expected CutoffNonPeriodic, got {forces['NonbondedForce'].getNonbondedMethod()}" + forces = { + force.__class__.__name__: force for force in system.getForces() + } + assert ( + forces["NonbondedForce"].getNonbondedMethod() + == openmm.NonbondedForce.NoCutoff + ), "Expected CutoffNonPeriodic, got {forces['NonbondedForce'].getNonbondedMethod()}" # Create periodic Topology box_vectors = unit.Quantity(np.diag([30, 30, 30]), unit.angstrom) periodic_openmm_topology = copy.deepcopy(nonperiodic_openmm_topology) periodic_openmm_topology.setPeriodicBoxVectors(box_vectors) system = generator.create_system(periodic_openmm_topology) - forces = {force.__class__.__name__: force for force in system.getForces()} - assert forces['NonbondedForce'].getNonbondedMethod() == openmm.NonbondedForce.PME, "Expected LJPME, got {forces['NonbondedForce'].getNonbondedMethod()}" - - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) + forces = { + force.__class__.__name__: force for force in system.getForces() + } + assert ( + forces["NonbondedForce"].getNonbondedMethod() + == openmm.NonbondedForce.PME + ), "Expected LJPME, got {forces['NonbondedForce'].getNonbondedMethod()}" + + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) def test_forcefield_kwargs(self, small_molecule_forcefield, test_systems): """Test that forcefield_kwargs and nonbonded method specifications work correctly""" from openmm import unit - forcefield_kwargs = { 'hydrogenMass' : 4*unit.amu } + + forcefield_kwargs = {"hydrogenMass": 4 * unit.amu} from openmmforcefields.generators import SystemGenerator # Test exception is raised with pytest.raises(ValueError) as excinfo: # Not allowed to specify nonbondedMethod in forcefield_kwargs - generator = SystemGenerator(forcefield_kwargs={'nonbondedMethod':PME}) - assert "nonbondedMethod cannot be specified in forcefield_kwargs" in str(excinfo.value) + generator = SystemGenerator(forcefield_kwargs={"nonbondedMethod": PME}) + assert "nonbondedMethod cannot be specified in forcefield_kwargs" in str( + excinfo.value + ) for name, testsystem in test_systems.items(): print(testsystem) - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # Create a SystemGenerator for this force field - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield, - forcefield_kwargs=forcefield_kwargs, - periodic_forcefield_kwargs={'nonbondedMethod': LJPME}, - nonperiodic_forcefield_kwargs={'nonbondedMethod': CutoffNonPeriodic}, - molecules=molecules) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + forcefield_kwargs=forcefield_kwargs, + periodic_forcefield_kwargs={"nonbondedMethod": LJPME}, + nonperiodic_forcefield_kwargs={"nonbondedMethod": CutoffNonPeriodic}, + molecules=molecules, + ) # Parameterize molecules for molecule in molecules: # Create non-periodic Topology nonperiodic_openmm_topology = molecule.to_topology().to_openmm() system = generator.create_system(nonperiodic_openmm_topology) - forces = {force.__class__.__name__: force for force in system.getForces()} - assert forces[ - 'NonbondedForce'].getNonbondedMethod() == openmm.NonbondedForce.CutoffNonPeriodic, "Expected CutoffNonPeriodic, got {forces['NonbondedForce'].getNonbondedMethod()}" + forces = { + force.__class__.__name__: force for force in system.getForces() + } + assert ( + forces["NonbondedForce"].getNonbondedMethod() + == openmm.NonbondedForce.CutoffNonPeriodic + ), "Expected CutoffNonPeriodic, got {forces['NonbondedForce'].getNonbondedMethod()}" # Create periodic Topology box_vectors = unit.Quantity(np.diag([30, 30, 30]), unit.angstrom) periodic_openmm_topology = copy.deepcopy(nonperiodic_openmm_topology) periodic_openmm_topology.setPeriodicBoxVectors(box_vectors) system = generator.create_system(periodic_openmm_topology) - forces = {force.__class__.__name__: force for force in system.getForces()} - assert forces[ - 'NonbondedForce'].getNonbondedMethod() == openmm.NonbondedForce.LJPME, "Expected LJPME, got {forces['NonbondedForce'].getNonbondedMethod()}" - - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) - def test_parameterize_molecules_from_creation(self, test_systems, small_molecule_forcefield): + forces = { + force.__class__.__name__: force for force in system.getForces() + } + assert ( + forces["NonbondedForce"].getNonbondedMethod() + == openmm.NonbondedForce.LJPME + ), "Expected LJPME, got {forces['NonbondedForce'].getNonbondedMethod()}" + + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) + def test_parameterize_molecules_from_creation( + self, test_systems, small_molecule_forcefield + ): """Test that SystemGenerator can parameterize pre-specified molecules in vacuum""" for name, testsystem in test_systems.items(): print(testsystem) - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # Create a SystemGenerator for this force field - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield, - molecules=molecules) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + molecules=molecules, + ) # Parameterize molecules for molecule in molecules: @@ -304,40 +388,54 @@ def test_parameterize_molecules_from_creation(self, test_systems, small_molecule with Timer() as t2: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms - assert (t2.interval() < t1.interval()) - - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) - def test_parameterize_molecules_specified_during_create_system(self, test_systems, small_molecule_forcefield): + assert t2.interval() < t1.interval() + + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) + def test_parameterize_molecules_specified_during_create_system( + self, test_systems, small_molecule_forcefield + ): """Test that SystemGenerator can parameterize molecules specified during create_system""" for name, testsystem in test_systems.items(): - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # Create a SystemGenerator for this force field - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + ) # Parameterize molecules for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() # Specify molecules during system creation - system = generator.create_system(openmm_topology, molecules=molecules) - - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) + generator.create_system(openmm_topology, molecules=molecules) + + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) def test_add_molecules(self, test_systems, small_molecule_forcefield): """Test that Molecules can be added to SystemGenerator later""" - # Create a SystemGenerator for this force field - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield) + # Create a SystemGenerator for this force field + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + ) # Add molecules for each test system separately for name, testsystem in test_systems.items(): - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # Add molecules generator.add_molecules(molecules) @@ -352,26 +450,34 @@ def test_add_molecules(self, test_systems, small_molecule_forcefield): with Timer() as t2: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms - assert (t2.interval() < t1.interval()) - - @pytest.mark.parametrize("small_molecule_forcefield", [ - 'gaff-2.11', - 'openff-2.0.0', - pytest.param('espaloma-0.3.2', marks=pytest.mark.espaloma)]) + assert t2.interval() < t1.interval() + + @pytest.mark.parametrize( + "small_molecule_forcefield", + [ + "gaff-2.11", + "openff-2.0.0", + pytest.param("espaloma-0.3.2", marks=pytest.mark.espaloma), + ], + ) def test_cache(self, test_systems, small_molecule_forcefield): """Test that SystemGenerator correctly manages a cache""" - timing = dict() # timing[(small_molecule_forcefield, smiles)] is the time (in seconds) to parameterize molecule the first time + timing = ( + dict() + ) # timing[(small_molecule_forcefield, smiles)] is the time (in seconds) to parameterize molecule the first time with tempfile.TemporaryDirectory() as tmpdirname: # Create a single shared cache for all force fields - cache = os.path.join(tmpdirname, 'db.json') + cache = os.path.join(tmpdirname, "db.json") # Test that we can parameterize all molecules for all test systems - # Create a SystemGenerator - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield, - cache=cache) + # Create a SystemGenerator + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + cache=cache, + ) # Add molecules for each test system separately for name, testsystem in test_systems.items(): - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # Add molecules generator.add_molecules(molecules) @@ -382,17 +488,21 @@ def test_cache(self, test_systems, small_molecule_forcefield): system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Record time - timing[(small_molecule_forcefield, molecule.to_smiles())] = timer.interval() + timing[ + (small_molecule_forcefield, molecule.to_smiles()) + ] = timer.interval() # Molecules should now be cached; test timing is faster the second time # Test that we can parameterize all molecules for all test systems # Create a SystemGenerator - generator = SystemGenerator(forcefields=self.amber_forcefields, - small_molecule_forcefield=small_molecule_forcefield, - cache=cache) + generator = SystemGenerator( + forcefields=self.amber_forcefields, + small_molecule_forcefield=small_molecule_forcefield, + cache=cache, + ) # Add molecules for each test system separately for name, testsystem in test_systems.items(): - molecules = testsystem['molecules'] + molecules = testsystem["molecules"] # We don't need to add molecules that are already defined in the cache # Parameterize molecules @@ -407,28 +517,36 @@ def test_complex(self, test_systems): for name, testsystem in test_systems.items(): from openmm import unit - print(f'Testing parameterization of {name} in vacuum') - molecules = testsystem['molecules'] + print(f"Testing parameterization of {name} in vacuum") + molecules = testsystem["molecules"] # Select a complex from the set ligand_index = 0 - complex_structure = testsystem['complex_structures'][ligand_index] + complex_structure = testsystem["complex_structures"][ligand_index] openmm_topology = complex_structure.topology - cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', name)), 'cache.json') + cache = os.path.join( + get_data_filename(os.path.join("perses_jacs_systems", name)), + "cache.json", + ) # Create a system in vacuum - generator = SystemGenerator(forcefields=self.amber_forcefields, - molecules=molecules, cache=cache) + generator = SystemGenerator( + forcefields=self.amber_forcefields, molecules=molecules, cache=cache + ) system = generator.create_system(openmm_topology) assert system.getNumParticles() == len(complex_structure.atoms) # Create solvated structure modeller = Modeller(complex_structure.topology, complex_structure.positions) - modeller.addSolvent(generator.forcefield, padding=0*unit.angstroms, ionicStrength=300*unit.millimolar) + modeller.addSolvent( + generator.forcefield, + padding=0 * unit.angstroms, + ionicStrength=300 * unit.millimolar, + ) # Create a system with solvent and ions system = generator.create_system(modeller.topology) assert system.getNumParticles() == len(list(modeller.topology.atoms())) - with open('test.pdb', 'w') as outfile: + with open("test.pdb", "w") as outfile: PDBFile.writeFile(modeller.topology, modeller.positions, outfile) diff --git a/openmmforcefields/tests/test_template_generators.py b/openmmforcefields/tests/test_template_generators.py index 984b4e3f..1f499019 100644 --- a/openmmforcefields/tests/test_template_generators.py +++ b/openmmforcefields/tests/test_template_generators.py @@ -1,13 +1,12 @@ import copy import logging import os -import pytest import tempfile import unittest -import pytest import numpy as np import openmm +import pytest from openff.toolkit.topology import Molecule from openff.toolkit.typing.engines.smirnoff import ForceField as OFFForceField from openff.units import unit as OFFUnit @@ -22,16 +21,21 @@ _logger = logging.getLogger("openmmforcefields.tests.test_template_generators") -CI = ('CI' in os.environ) +CI = "CI" in os.environ ################################################################################ # Tests ################################################################################ + class TestGAFFTemplateGenerator(unittest.TestCase): TEMPLATE_GENERATOR = GAFFTemplateGenerator - amber_forcefields = ['amber/protein.ff14SB.xml', 'amber/tip3p_standard.xml', 'amber/tip3p_HFE_multivalent.xml'] + amber_forcefields = [ + "amber/protein.ff14SB.xml", + "amber/tip3p_standard.xml", + "amber/tip3p_HFE_multivalent.xml", + ] def filter_molecules(self, molecules): """ @@ -51,7 +55,7 @@ def filter_molecules(self, molecules): # TODO: Eliminate molecules without fully-specified stereochemistry # Select some small molecules for fast testing MAX_ATOMS = 40 - molecules = [ molecule for molecule in molecules if molecule.n_atoms < MAX_ATOMS ] + molecules = [molecule for molecule in molecules if molecule.n_atoms < MAX_ATOMS] # Cut down number of tests for continuous integration MAX_MOLECULES = 50 if not CI else 4 molecules = molecules[:MAX_MOLECULES] @@ -64,16 +68,18 @@ def setUp(self): # TODO: Harmonize with test_system_generator.py infrastructure # Read test molecules - filename = get_data_filename("minidrugbank/MiniDrugBank-without-unspecified-stereochemistry.sdf") + filename = get_data_filename( + "minidrugbank/MiniDrugBank-without-unspecified-stereochemistry.sdf" + ) molecules = Molecule.from_file(filename, allow_undefined_stereo=True) # DEBUG: Insert acetone perturbed from planarity as first test molecule, since it fails quickly if something is wrong - molecule = Molecule.from_smiles('C=O') + molecule = Molecule.from_smiles("C=O") molecule.generate_conformers(n_conformers=1) uses_old_api = hasattr(molecule.atoms[0], "element") - molecule.conformers[0][0,0] += ensure_quantity( + molecule.conformers[0][0, 0] += ensure_quantity( unit.Quantity(0.1, unit.angstroms), "openmm" if uses_old_api else "openff", ) @@ -86,7 +92,8 @@ def setUp(self): # Suppress DEBUG logging from various packages import logging - for name in ['parmed', 'matplotlib']: + + for name in ["parmed", "matplotlib"]: logging.getLogger(name).setLevel(logging.WARNING) def test_version(self): @@ -94,14 +101,20 @@ def test_version(self): for forcefield in GAFFTemplateGenerator.INSTALLED_FORCEFIELDS: generator = GAFFTemplateGenerator(forcefield=forcefield) import re - result = re.match(r'^gaff-(?P\d+)\.(?P\d+)$', forcefield) + + result = re.match( + r"^gaff-(?P\d+)\.(?P\d+)$", forcefield + ) assert generator.forcefield == forcefield - assert generator.gaff_version == result['major_version'] + '.' + result['minor_version'] - assert generator.gaff_major_version == result['major_version'] - assert generator.gaff_minor_version == result['minor_version'] - assert generator.gaff_dat_filename.endswith(forcefield + '.dat') + assert ( + generator.gaff_version + == result["major_version"] + "." + result["minor_version"] + ) + assert generator.gaff_major_version == result["major_version"] + assert generator.gaff_minor_version == result["minor_version"] + assert generator.gaff_dat_filename.endswith(forcefield + ".dat") assert os.path.exists(generator.gaff_dat_filename) - assert generator.gaff_xml_filename.endswith(forcefield + '.xml') + assert generator.gaff_xml_filename.endswith(forcefield + ".xml") assert os.path.exists(generator.gaff_xml_filename) def test_create(self): @@ -112,7 +125,7 @@ def test_create(self): generator = self.TEMPLATE_GENERATOR(molecules=self.molecules) # Create a generator that also has a database cache with tempfile.TemporaryDirectory() as tmpdirname: - cache = os.path.join(tmpdirname, 'db.json') + cache = os.path.join(tmpdirname, "db.json") # Create a new database file generator = self.TEMPLATE_GENERATOR(molecules=self.molecules, cache=cache) del generator @@ -137,7 +150,7 @@ def test_add_molecules(self): system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) except ValueError as e: # Exception 'No template found...' is expected - assert str(e).startswith('No template found') + assert str(e).startswith("No template found") # Now add the molecule to the generator and ensure parameterization passes generator.add_molecules(molecule) @@ -147,6 +160,7 @@ def test_add_molecules(self): except Exception as e: print(forcefield._atomTypes.keys()) from openff.units.openmm import ensure_quantity + PDBFile.writeFile( openmm_topology, ensure_quantity(molecule.conformers[0], "openmm"), @@ -178,10 +192,13 @@ def charges_from_system(self, system): """ from openmm import unit + system_charges = list() - forces = { force.__class__.__name__ : force for force in system.getForces() } + forces = {force.__class__.__name__: force for force in system.getForces()} for particle_index in range(system.getNumParticles()): - charge, sigma, epsilon = forces['NonbondedForce'].getParticleParameters(particle_index) + charge, sigma, epsilon = forces["NonbondedForce"].getParticleParameters( + particle_index + ) system_charges.append(charge / unit.elementary_charge) system_charges = np.array(system_charges) @@ -212,15 +229,16 @@ def charges_are_equal(self, system, molecule): # type(molecule.partial_charges) depends on the toolkit version molecule_charges: np.ndarray = ensure_quantity( - molecule.partial_charges, "openff", + molecule.partial_charges, + "openff", ).m_as(unit.elementary_charge) result = np.allclose(system_charges, molecule_charges) if not result: - _logger.info('Charges are not equal') - _logger.info(f'system charges : {system_charges}') - _logger.info(f'molecule charges: {molecule_charges}') + _logger.info("Charges are not equal") + _logger.info(f"system charges : {system_charges}") + _logger.info(f"molecule charges: {molecule_charges}") return result @@ -235,16 +253,21 @@ def test_charge(self): # Check that parameterizing a molecule using user-provided charges produces expected charges from openmm import unit + molecule = self.molecules[0] # Ensure partial charges are initially zero - assert (molecule.partial_charges is None) or np.all(molecule.partial_charges / unit.elementary_charge == 0) + assert (molecule.partial_charges is None) or np.all( + molecule.partial_charges / unit.elementary_charge == 0 + ) # Add the molecule generator.add_molecules(molecule) # Create the System openmm_topology = molecule.to_topology().to_openmm() system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) # Ensure charges are no longer zero - assert not np.all(self.charges_from_system(system) == 0), "System has zero charges despite molecule not being charged" + assert not np.all( + self.charges_from_system(system) == 0 + ), "System has zero charges despite molecule not being charged" def test_charge_from_molecules(self): """Test that user-specified partial charges are used if requested""" @@ -273,21 +296,23 @@ def test_charge_from_molecules(self): unit.elementary_charge, ) - assert (molecule.partial_charges is not None) + assert molecule.partial_charges is not None assert not np.all(ensure_quantity(molecule.partial_charges, "openff").m == 0) generator.add_molecules(molecule) - system = forcefield.createSystem(molecule.to_topology().to_openmm(), nonbondedMethod=NoCutoff) + system = forcefield.createSystem( + molecule.to_topology().to_openmm(), nonbondedMethod=NoCutoff + ) assert self.charges_are_equal(system, molecule) def test_debug_ffxml(self): """Test that debug ffxml file is created when requested""" with tempfile.TemporaryDirectory() as tmpdirname: - debug_ffxml_filename = os.path.join(tmpdirname, 'molecule.ffxml') - cache = os.path.join(tmpdirname, 'db.json') + debug_ffxml_filename = os.path.join(tmpdirname, "molecule.ffxml") + cache = os.path.join(tmpdirname, "db.json") # Create a generator that only knows about one molecule molecule = self.molecules[0] generator = self.TEMPLATE_GENERATOR(molecules=molecule, cache=cache) @@ -308,10 +333,12 @@ def test_debug_ffxml(self): assert os.path.exists(debug_ffxml_filename) # Ensure we can use that file to create a new force field forcefield_from_ffxml = ForceField() - if hasattr(generator, 'gaff_xml_filename'): + if hasattr(generator, "gaff_xml_filename"): forcefield_from_ffxml.loadFile(generator.gaff_xml_filename) forcefield_from_ffxml.loadFile(debug_ffxml_filename) - system2 = forcefield_from_ffxml.createSystem(openmm_topology, nonbondedMethod=NoCutoff) + system2 = forcefield_from_ffxml.createSystem( + openmm_topology, nonbondedMethod=NoCutoff + ) # TODO: Test that systems are equivalent assert system.getNumParticles() == system2.getNumParticles() @@ -319,7 +346,7 @@ def test_cache(self): """Test template generator cache capability""" with tempfile.TemporaryDirectory() as tmpdirname: # Create a generator that also has a database cache - cache = os.path.join(tmpdirname, 'db.json') + cache = os.path.join(tmpdirname, "db.json") generator = self.TEMPLATE_GENERATOR(molecules=self.molecules, cache=cache) # Create a ForceField forcefield = ForceField() @@ -343,13 +370,15 @@ def check_cache(generator, n_expected): Number of expected records """ from tinydb import TinyDB + db = TinyDB(generator._cache) table = db.table(generator._database_table_name) db_entries = table.all() db.close() n_entries = len(db_entries) - assert (n_entries == n_expected), \ - f"Expected {n_expected} entries but database has {n_entries}\n db contents: {db_entries}" + assert ( + n_entries == n_expected + ), f"Expected {n_expected} entries but database has {n_entries}\n db contents: {db_entries}" check_cache(generator, len(self.molecules)) @@ -357,7 +386,7 @@ def check_cache(generator, n_expected): del forcefield, generator # Create a generator that also uses the database cache but has no molecules - print('Creating new generator with just cache...') + print("Creating new generator with just cache...") generator = self.TEMPLATE_GENERATOR(cache=cache) # Check database still contains the molecules we expect check_cache(generator, len(self.molecules)) @@ -375,16 +404,17 @@ def test_add_solvent(self): # Select a molecule to add solvent around from openff.units.openmm import ensure_quantity from openmm import unit + molecule = self.molecules[0] openmm_topology = molecule.to_topology().to_openmm() openmm_positions = ensure_quantity(molecule.conformers[0], "openmm") # Try adding solvent without residue template generator; this will fail - forcefield = ForceField('tip3p.xml') + forcefield = ForceField("tip3p.xml") # Add solvent to a system containing a small molecule modeller = Modeller(openmm_topology, openmm_positions) try: - modeller.addSolvent(forcefield, model='tip3p', padding=6.0*unit.angstroms) - except ValueError as e: + modeller.addSolvent(forcefield, model="tip3p", padding=6.0 * unit.angstroms) + except ValueError: pass # Create a generator that knows about a few molecules @@ -393,40 +423,49 @@ def test_add_solvent(self): forcefield.registerTemplateGenerator(generator.generator) # Add solvent to a system containing a small molecule # This should succeed - modeller.addSolvent(forcefield, model='tip3p', padding=6.0*unit.angstroms) + modeller.addSolvent(forcefield, model="tip3p", padding=6.0 * unit.angstroms) def test_jacs_ligands(self): """Use template generator to parameterize the Schrodinger JACS set of ligands""" jacs_systems = { - #'bace' : { 'prefix' : 'Bace' }, - #'cdk2' : { 'prefix' : 'CDK2' }, - 'jnk1' : { 'prefix' : 'Jnk1' }, - 'mcl1' : { 'prefix' : 'MCL1' }, - #'p38' : { 'prefix' : 'p38' }, - 'ptp1b' : { 'prefix' : 'PTP1B' }, - 'thrombin' : { 'prefix' : 'Thrombin' }, - #'tyk2' : { 'prefix' : 'Tyk2' }, + # 'bace' : { 'prefix' : 'Bace' }, + # 'cdk2' : { 'prefix' : 'CDK2' }, + "jnk1": {"prefix": "Jnk1"}, + "mcl1": {"prefix": "MCL1"}, + # 'p38' : { 'prefix' : 'p38' }, + "ptp1b": {"prefix": "PTP1B"}, + "thrombin": {"prefix": "Thrombin"}, + # 'tyk2' : { 'prefix' : 'Tyk2' }, } for system_name in jacs_systems: - prefix = jacs_systems[system_name]['prefix'] + prefix = jacs_systems[system_name]["prefix"] # Load molecules - ligand_sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) - print(f'Reading molecules from {ligand_sdf_filename} ...') - molecules = Molecule.from_file(ligand_sdf_filename, allow_undefined_stereo=True) + ligand_sdf_filename = get_data_filename( + os.path.join( + "perses_jacs_systems", system_name, prefix + "_ligands.sdf" + ) + ) + print(f"Reading molecules from {ligand_sdf_filename} ...") + molecules = Molecule.from_file( + ligand_sdf_filename, allow_undefined_stereo=True + ) # Ensure this is a list try: - nmolecules = len(molecules) + len(molecules) except TypeError: molecules = [molecules] - print(f'Read {len(molecules)} molecules from {ligand_sdf_filename}') - #molecules = self.filter_molecules(molecules) + print(f"Read {len(molecules)} molecules from {ligand_sdf_filename}") + # molecules = self.filter_molecules(molecules) MAX_MOLECULES = len(molecules) if not CI else 3 molecules = molecules[:MAX_MOLECULES] - print(f'{len(molecules)} molecules remain after filtering') + print(f"{len(molecules)} molecules remain after filtering") # Create template generator with local cache - cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', system_name)), 'cache.json') + cache = os.path.join( + get_data_filename(os.path.join("perses_jacs_systems", system_name)), + "cache.json", + ) generator = self.TEMPLATE_GENERATOR(molecules=molecules, cache=cache) # Create a ForceField @@ -435,7 +474,7 @@ def test_jacs_ligands(self): forcefield.registerTemplateGenerator(generator.generator) # Parameterize all molecules - print(f'Caching all molecules for {system_name} at {cache} ...') + print(f"Caching all molecules for {system_name} at {cache} ...") n_success = 0 n_failure = 0 for molecule in molecules: @@ -446,44 +485,59 @@ def test_jacs_ligands(self): except Exception as e: n_failure += 1 print(e) - print(f'{n_failure}/{n_success+n_failure} ligands failed to parameterize for {system_name}') + print( + f"{n_failure}/{n_success+n_failure} ligands failed to parameterize for {system_name}" + ) def test_jacs_complexes(self): """Use template generator to parameterize the Schrodinger JACS set of complexes""" # TODO: Uncomment working systems when we have cleaned up the input files jacs_systems = { - #'bace' : { 'prefix' : 'Bace' }, - #'cdk2' : { 'prefix' : 'CDK2' }, - #'jnk1' : { 'prefix' : 'Jnk1' }, - 'mcl1' : { 'prefix' : 'MCL1' }, - #'p38' : { 'prefix' : 'p38' }, - #'ptp1b' : { 'prefix' : 'PTP1B' }, - #'thrombin' : { 'prefix' : 'Thrombin' }, - #'tyk2' : { 'prefix' : 'Tyk2' }, + # 'bace' : { 'prefix' : 'Bace' }, + # 'cdk2' : { 'prefix' : 'CDK2' }, + # 'jnk1' : { 'prefix' : 'Jnk1' }, + "mcl1": {"prefix": "MCL1"}, + # 'p38' : { 'prefix' : 'p38' }, + # 'ptp1b' : { 'prefix' : 'PTP1B' }, + # 'thrombin' : { 'prefix' : 'Thrombin' }, + # 'tyk2' : { 'prefix' : 'Tyk2' }, } for system_name in jacs_systems: - prefix = jacs_systems[system_name]['prefix'] + prefix = jacs_systems[system_name]["prefix"] # Read molecules - ligand_sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) - print(f'Reading molecules from {ligand_sdf_filename} ...') - molecules = Molecule.from_file(ligand_sdf_filename, allow_undefined_stereo=True) + ligand_sdf_filename = get_data_filename( + os.path.join( + "perses_jacs_systems", system_name, prefix + "_ligands.sdf" + ) + ) + print(f"Reading molecules from {ligand_sdf_filename} ...") + molecules = Molecule.from_file( + ligand_sdf_filename, allow_undefined_stereo=True + ) try: - nmolecules = len(molecules) + len(molecules) except TypeError: molecules = [molecules] - print(f'Read {len(molecules)} molecules from {ligand_sdf_filename}') + print(f"Read {len(molecules)} molecules from {ligand_sdf_filename}") # Read ParmEd Structures import parmed from openmm import unit - protein_pdb_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_protein.pdb')) - print(f'Reading protein from {protein_pdb_filename} ...') - #protein_structure = parmed.load_file(protein_pdb_filename) # NOTE: This mis-interprets distorted geometry and sequentially-numbered residues that span chain breaks + + protein_pdb_filename = get_data_filename( + os.path.join( + "perses_jacs_systems", system_name, prefix + "_protein.pdb" + ) + ) + print(f"Reading protein from {protein_pdb_filename} ...") + # protein_structure = parmed.load_file(protein_pdb_filename) # NOTE: This mis-interprets distorted geometry and sequentially-numbered residues that span chain breaks pdbfile = PDBFile(protein_pdb_filename) - protein_structure = parmed.openmm.load_topology(pdbfile.topology, xyz=pdbfile.positions.value_in_unit(unit.angstroms)) + protein_structure = parmed.openmm.load_topology( + pdbfile.topology, xyz=pdbfile.positions.value_in_unit(unit.angstroms) + ) ligand_structures = parmed.load_file(ligand_sdf_filename) try: - nmolecules = len(ligand_structures) + len(ligand_structures) except TypeError: ligand_structures = [ligand_structures] assert len(ligand_structures) == len(molecules) @@ -492,13 +546,19 @@ def test_jacs_complexes(self): MAX_MOLECULES = 6 if not CI else 3 molecules = molecules[:MAX_MOLECULES] ligand_structures = ligand_structures[:MAX_MOLECULES] - print(f'{len(molecules)} molecules remain after filtering') + print(f"{len(molecules)} molecules remain after filtering") # Create complexes - complex_structures = [ (protein_structure + ligand_structure) for ligand_structure in ligand_structures ] + complex_structures = [ + (protein_structure + ligand_structure) + for ligand_structure in ligand_structures + ] # Create template generator with local cache - cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', system_name)), 'cache.json') + cache = os.path.join( + get_data_filename(os.path.join("perses_jacs_systems", system_name)), + "cache.json", + ) generator = self.TEMPLATE_GENERATOR(molecules=molecules, cache=cache) # Create a ForceField @@ -507,29 +567,47 @@ def test_jacs_complexes(self): forcefield.registerTemplateGenerator(generator.generator) # Parameterize all complexes - print(f'Caching all molecules for {system_name} at {cache} ...') + print(f"Caching all molecules for {system_name} at {cache} ...") for ligand_index, complex_structure in enumerate(complex_structures): - openmm_topology = complex_structure.topology molecule = molecules[ligand_index] # Delete hydrogens from terminal protein residues # TODO: Fix the input files so we don't need to do this from openmm import app - modeller = app.Modeller(complex_structure.topology, complex_structure.positions) - residues = [residue for residue in modeller.topology.residues() if residue.name != 'UNL'] + + modeller = app.Modeller( + complex_structure.topology, complex_structure.positions + ) + residues = [ + residue + for residue in modeller.topology.residues() + if residue.name != "UNL" + ] termini_ids = [residues[0].id, residues[-1].id] - #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] - hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.id in termini_ids] + # hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] + hs = [ + atom + for atom in modeller.topology.atoms() + if atom.element.symbol in ["H"] and atom.residue.id in termini_ids + ] modeller.delete(hs) modeller.addHydrogens(forcefield) # Parameterize protein:ligand complex in vacuum - print(f' Parameterizing {system_name} : {molecule.to_smiles()} in vacuum...') + print( + f" Parameterizing {system_name} : {molecule.to_smiles()} in vacuum..." + ) forcefield.createSystem(modeller.topology, nonbondedMethod=NoCutoff) # Parameterize protein:ligand complex in solvent - print(f' Parameterizing {system_name} : {molecule.to_smiles()} in explicit solvent...') - modeller.addSolvent(forcefield, padding=0*unit.angstroms, ionicStrength=300*unit.millimolar) + print( + f" Parameterizing {system_name} : {molecule.to_smiles()} in explicit solvent..." + ) + modeller.addSolvent( + forcefield, + padding=0 * unit.angstroms, + ionicStrength=300 * unit.millimolar, + ) forcefield.createSystem(modeller.topology, nonbondedMethod=PME) def test_parameterize(self): @@ -543,10 +621,12 @@ def test_parameterize(self): if "opc" in small_molecule_forcefield: continue - print(f'Testing {small_molecule_forcefield}') + print(f"Testing {small_molecule_forcefield}") # Create a generator that knows about a few molecules # TODO: Should the generator also load the appropriate force field files into the ForceField object? - generator = self.TEMPLATE_GENERATOR(molecules=self.molecules, forcefield=small_molecule_forcefield) + generator = self.TEMPLATE_GENERATOR( + molecules=self.molecules, forcefield=small_molecule_forcefield + ) # Check that we have loaded the right force field assert generator.forcefield == small_molecule_forcefield # Create a ForceField with the appropriate small molecule force field @@ -556,21 +636,26 @@ def test_parameterize(self): # Parameterize some molecules from openmmforcefields.utils import Timer + for molecule in self.molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as t1: - system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) + system = forcefield.createSystem( + openmm_topology, nonbondedMethod=NoCutoff + ) assert system.getNumParticles() == molecule.n_atoms # Molecule should now be cached with Timer() as t2: - system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) + system = forcefield.createSystem( + openmm_topology, nonbondedMethod=NoCutoff + ) assert system.getNumParticles() == molecule.n_atoms - assert (t2.interval() < t1.interval()) + assert t2.interval() < t1.interval() def test_multiple_registration(self): """Test registering the template generator with multiple force fields""" generator = self.TEMPLATE_GENERATOR(molecules=self.molecules) - NUM_FORCEFIELDS = 2 # number of force fields to test + NUM_FORCEFIELDS = 2 # number of force fields to test forcefields = list() for index in range(NUM_FORCEFIELDS): forcefield = ForceField() @@ -607,19 +692,33 @@ def compute_energy(system, positions): system = copy.deepcopy(system) for index, force in enumerate(system.getForces()): force.setForceGroup(index) - platform = openmm.Platform.getPlatformByName('Reference') + platform = openmm.Platform.getPlatformByName("Reference") integrator = openmm.VerletIntegrator(0.001) context = openmm.Context(system, integrator, platform) context.setPositions(positions) openmm_energy = { - 'total' : context.getState(getEnergy=True).getPotentialEnergy(), - 'components' : { system.getForce(index).__class__.__name__ : context.getState(getEnergy=True, groups=(1 << index)).getPotentialEnergy() for index in range(system.getNumForces()) }, - } + "total": context.getState(getEnergy=True).getPotentialEnergy(), + "components": { + system.getForce(index) + .__class__.__name__: context.getState( + getEnergy=True, groups=(1 << index) + ) + .getPotentialEnergy() + for index in range(system.getNumForces()) + }, + } openmm_forces = { - 'total' : context.getState(getForces=True).getForces(asNumpy=True), - 'components' : { system.getForce(index).__class__.__name__ : context.getState(getForces=True, groups=(1 << index)).getForces(asNumpy=True) for index in range(system.getNumForces()) }, - } + "total": context.getState(getForces=True).getForces(asNumpy=True), + "components": { + system.getForce(index) + .__class__.__name__: context.getState( + getForces=True, groups=(1 << index) + ) + .getForces(asNumpy=True) + for index in range(system.getNumForces()) + }, + } del context, integrator return openmm_energy, openmm_forces @@ -656,65 +755,90 @@ def compare_energies(cls, molecule, template_generated_system, reference_system) from openmm import unit def write_xml(filename, system): - with open(filename, 'w') as outfile: - print(f'Writing {filename}...') + with open(filename, "w") as outfile: + print(f"Writing {filename}...") outfile.write(openmm.XmlSerializer.serialize(system)) # DEBUG print(openmm.XmlSerializer.serialize(system)) # Make sure both systems contain the same energy components - reference_components = set(reference_energy['components']) - template_components = set(template_energy['components']) + reference_components = set(reference_energy["components"]) + template_components = set(template_energy["components"]) if len(reference_components.difference(template_components)) > 0: - raise Exception(f'Reference system contains components {reference_components.difference(template_components)} that do not appear in template-generated system.') + raise Exception( + f"Reference system contains components {reference_components.difference(template_components)} that do not appear in template-generated system." + ) if len(template_components.difference(reference_components)) > 0: - raise Exception(f'Template-generated system contains components {template_components.difference(reference_components)} that do not appear in reference system.') + raise Exception( + f"Template-generated system contains components {template_components.difference(reference_components)} that do not appear in reference system." + ) components = reference_components # Compare energies ENERGY_DEVIATION_TOLERANCE = 1.0e-2 * unit.kilocalories_per_mole - delta = (template_energy['total'] - reference_energy['total']) + delta = template_energy["total"] - reference_energy["total"] if abs(delta) > ENERGY_DEVIATION_TOLERANCE: # Show breakdown by components - print('Energy components:') - print(f"{'component':24} {'Template (kcal/mol)':>20} {'Reference (kcal/mol)':>20}") + print("Energy components:") + print( + f"{'component':24} {'Template (kcal/mol)':>20} {'Reference (kcal/mol)':>20}" + ) for key in components: - reference_component_energy = reference_energy['components'][key] - template_component_energy = template_energy['components'][key] - print(f'{key:24} {(template_component_energy/unit.kilocalories_per_mole):20.3f} {(reference_component_energy/unit.kilocalories_per_mole):20.3f} kcal/mol') - print(f'{"TOTAL":24} {(template_energy["total"]/unit.kilocalories_per_mole):20.3f} {(reference_energy["total"]/unit.kilocalories_per_mole):20.3f} kcal/mol') - write_xml('reference_system.xml', reference_system) - write_xml('template_system.xml', template_system) # What's this? This variable does not exist - raise Exception(f'Energy deviation for {molecule.to_smiles()} ({delta/unit.kilocalories_per_mole} kcal/mol) exceeds threshold ({ENERGY_DEVIATION_TOLERANCE})') + reference_component_energy = reference_energy["components"][key] + template_component_energy = template_energy["components"][key] + print( + f"{key:24} {(template_component_energy/unit.kilocalories_per_mole):20.3f} {(reference_component_energy/unit.kilocalories_per_mole):20.3f} kcal/mol" + ) + print( + f'{"TOTAL":24} {(template_energy["total"]/unit.kilocalories_per_mole):20.3f} {(reference_energy["total"]/unit.kilocalories_per_mole):20.3f} kcal/mol' + ) + write_xml("reference_system.xml", reference_system) + + # What's this? This variable does not exist + write_xml( + "template_system.xml", + template_system, # noqa + ) + raise Exception( + f"Energy deviation for {molecule.to_smiles()} ({delta/unit.kilocalories_per_mole} kcal/mol) exceeds threshold ({ENERGY_DEVIATION_TOLERANCE})" + ) # Compare forces def norm(x): N = x.shape[0] - return np.sqrt((1.0/N) * (x**2).sum()) + return np.sqrt((1.0 / N) * (x**2).sum()) + def relative_deviation(x, y): FORCE_UNIT = unit.kilocalories_per_mole / unit.angstroms - if hasattr(x, 'value_in_unit'): + if hasattr(x, "value_in_unit"): x = x / FORCE_UNIT - if hasattr(y, 'value_in_unit'): + if hasattr(y, "value_in_unit"): y = y / FORCE_UNIT if norm(y) > 0: - return norm(x-y) / np.sqrt(norm(x)**2 + norm(y)**2) + return norm(x - y) / np.sqrt(norm(x) ** 2 + norm(y) ** 2) else: return 0 RELATIVE_FORCE_DEVIATION_TOLERANCE = 1.0e-5 - relative_force_deviation = relative_deviation(template_forces['total'], reference_forces['total']) + relative_force_deviation = relative_deviation( + template_forces["total"], reference_forces["total"] + ) if relative_force_deviation > RELATIVE_FORCE_DEVIATION_TOLERANCE: # Show breakdown by components - print('Force components:') + print("Force components:") print(f"{'component':24} {'relative deviation':>24}") for key in components: - print(f"{key:24} {relative_deviation(template_forces['components'][key], reference_forces['components'][key]):24.10f}") + print( + f"{key:24} {relative_deviation(template_forces['components'][key], reference_forces['components'][key]):24.10f}" + ) print(f'{"TOTAL":24} {relative_force_deviation:24.10f}') - write_xml('system-smirnoff.xml', reference_system) - write_xml('openmm-smirnoff.xml', template_generated_system) - raise Exception(f'Relative force deviation for {molecule.to_smiles()} ({relative_force_deviation}) exceeds threshold ({RELATIVE_FORCE_DEVIATION_TOLERANCE})') + write_xml("system-smirnoff.xml", reference_system) + write_xml("openmm-smirnoff.xml", template_generated_system) + raise Exception( + f"Relative force deviation for {molecule.to_smiles()} ({relative_force_deviation}) exceeds threshold ({RELATIVE_FORCE_DEVIATION_TOLERANCE})" + ) + class TestSMIRNOFFTemplateGenerator(TestGAFFTemplateGenerator): TEMPLATE_GENERATOR = SMIRNOFFTemplateGenerator @@ -746,7 +870,7 @@ def propagate_dynamics(self, molecule, system): timestep = 1.0 * unit.femtoseconds nsteps = 100 integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) - platform = openmm.Platform.getPlatformByName('Reference') + platform = openmm.Platform.getPlatformByName("Reference") context = openmm.Context(system, integrator, platform) context.setPositions(ensure_quantity(molecule.conformers[0], "openmm")) integrator.step(nsteps) @@ -759,7 +883,6 @@ def propagate_dynamics(self, molecule, system): "openmm" if uses_old_api else "openff", ) - del context, integrator return new_molecule @@ -767,13 +890,13 @@ def propagate_dynamics(self, molecule, system): def test_INSTALLED_FORCEFIELDS(self): """Test INSTALLED_FORCEFIELDS contains expected force fields""" expected_force_fields = [ - 'openff-1.1.0', - 'openff-2.0.0', - 'smirnoff99Frosst-1.1.0', + "openff-1.1.0", + "openff-2.0.0", + "smirnoff99Frosst-1.1.0", ] forbidden_force_fields = [ - 'openff_unconstrained', - 'ff14sb_0.0.3', + "openff_unconstrained", + "ff14sb_0.0.3", ] for expected in expected_force_fields: @@ -786,7 +909,9 @@ def test_energies(self): """Test potential energies match between openff-toolkit and OpenMM ForceField""" # Test all supported SMIRNOFF force fields - for small_molecule_forcefield in SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS: + for ( + small_molecule_forcefield + ) in SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS: if "ff14sb" in small_molecule_forcefield: continue if "tip" in small_molecule_forcefield: @@ -799,10 +924,12 @@ def test_energies(self): if "openff-2.0.0-rc.1" not in small_molecule_forcefield: continue - print(f'Testing energies for {small_molecule_forcefield}...') + print(f"Testing energies for {small_molecule_forcefield}...") # Create a generator that knows about a few molecules # TODO: Should the generator also load the appropriate force field files into the ForceField object? - generator = SMIRNOFFTemplateGenerator(molecules=self.molecules, forcefield=small_molecule_forcefield) + generator = SMIRNOFFTemplateGenerator( + molecules=self.molecules, forcefield=small_molecule_forcefield + ) # Create a ForceField openmm_forcefield = openmm.app.ForceField() # Register the template generator @@ -810,30 +937,39 @@ def test_energies(self): # Parameterize some molecules for molecule in self.molecules: # Create OpenMM System using OpenMM app - openmm_system = openmm_forcefield.createSystem(molecule.to_topology().to_openmm(), removeCMMotion=False, nonbondedMethod=NoCutoff) + openmm_system = openmm_forcefield.createSystem( + molecule.to_topology().to_openmm(), + removeCMMotion=False, + nonbondedMethod=NoCutoff, + ) # Retrieve System generated by the SMIRNOFF typing engine smirnoff_system = generator.get_openmm_system(molecule) # Compare energies and forces self.compare_energies(molecule, openmm_system, smirnoff_system) - + # Run some dynamics molecule = self.propagate_dynamics(molecule, smirnoff_system) # Compare energies again self.compare_energies(molecule, openmm_system, smirnoff_system) - def test_partial_charges_are_none(self): """Test parameterizing a small molecule with `partial_charges=None` instead of zeros (happens frequently in OFFTK>=0.7.0)""" - molecule = Molecule.from_smiles('C=O') + from openff.units import unit + + molecule = Molecule.from_smiles("C=O") molecule.generate_conformers(n_conformers=1) - #molecule._partial_charges = None - assert (molecule.partial_charges is None) or np.all(molecule.partial_charges / unit.elementary_charge == 0) + # molecule._partial_charges = None + assert (molecule.partial_charges is None) or np.all( + molecule.partial_charges / unit.elementary_charge == 0 + ) # Test all supported SMIRNOFF force fields - for small_molecule_forcefield in SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS: + for ( + small_molecule_forcefield + ) in SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS: if "ff14sb" in small_molecule_forcefield: continue if "tip" in small_molecule_forcefield: @@ -841,17 +977,23 @@ def test_partial_charges_are_none(self): if "opc" in small_molecule_forcefield: continue - print(f'Testing energies for {small_molecule_forcefield}...') + print(f"Testing energies for {small_molecule_forcefield}...") # Create a generator that knows about a few molecules # TODO: Should the generator also load the appropriate force field files into the ForceField object? - generator = SMIRNOFFTemplateGenerator(molecules=[molecule], forcefield=small_molecule_forcefield) + generator = SMIRNOFFTemplateGenerator( + molecules=[molecule], forcefield=small_molecule_forcefield + ) # Create a ForceField openmm_forcefield = openmm.app.ForceField() # Register the template generator openmm_forcefield.registerTemplateGenerator(generator.generator) # Create OpenMM System using OpenMM app - openmm_system = openmm_forcefield.createSystem(molecule.to_topology().to_openmm(), removeCMMotion=False, nonbondedMethod=NoCutoff) - smirnoff_system = generator.get_openmm_system(molecule) + openmm_forcefield.createSystem( + molecule.to_topology().to_openmm(), + removeCMMotion=False, + nonbondedMethod=NoCutoff, + ) + generator.get_openmm_system(molecule) def test_version(self): """Test version""" @@ -860,7 +1002,7 @@ def test_version(self): for forcefield in SMIRNOFFTemplateGenerator.INSTALLED_FORCEFIELDS: generator = SMIRNOFFTemplateGenerator(forcefield=forcefield) assert generator.forcefield == forcefield - assert generator.smirnoff_filename.endswith(forcefield + '.offxml') + assert generator.smirnoff_filename.endswith(forcefield + ".offxml") assert os.path.exists(generator.smirnoff_filename) def test_bespoke_force_field(self): @@ -873,25 +1015,34 @@ def test_bespoke_force_field(self): # Create a simple molecule with one bond type ethane = Molecule.from_smiles("C") # Label ethane to get the bond type (not hard coded incase this changes in future) - bond_parameter = custom_sage.label_molecules(ethane.to_topology())[0]["Bonds"][(0, 1)] + bond_parameter = custom_sage.label_molecules(ethane.to_topology())[0]["Bonds"][ + (0, 1) + ] # Edit the bond parameter bonds = custom_sage.get_parameter_handler("Bonds") new_parameter = bonds[bond_parameter.smirks] new_parameter.length = 2 * OFFUnit.angstrom # Use the custom sage passed as string to build a template and an openmm system - generator = SMIRNOFFTemplateGenerator(molecules=ethane, forcefield=custom_sage.to_string()) + generator = SMIRNOFFTemplateGenerator( + molecules=ethane, forcefield=custom_sage.to_string() + ) # Create a ForceField openmm_forcefield = openmm.app.ForceField() # Register the template generator openmm_forcefield.registerTemplateGenerator(generator.generator) # Use OpenMM app to generate the system - openmm_system = openmm_forcefield.createSystem(ethane.to_topology().to_openmm(), removeCMMotion=False, - nonbondedMethod=NoCutoff) + openmm_system = openmm_forcefield.createSystem( + ethane.to_topology().to_openmm(), + removeCMMotion=False, + nonbondedMethod=NoCutoff, + ) # Check the bond length has been updated - forces = {force.__class__.__name__: force for force in openmm_system.getForces()} + forces = { + force.__class__.__name__: force for force in openmm_system.getForces() + } bond_force = forces["HarmonicBondForce"] for i in range(bond_force.getNumBonds()): _, _, length, _ = bond_force.getBondParameters(i) @@ -929,7 +1080,7 @@ def propagate_dynamics(self, molecule, system): timestep = 1.0 * unit.femtoseconds nsteps = 100 integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) - platform = openmm.Platform.getPlatformByName('Reference') + platform = openmm.Platform.getPlatformByName("Reference") context = openmm.Context(system, integrator, platform) context.setPositions(ensure_quantity(molecule.conformers[0], "openmm")) integrator.step(nsteps) @@ -950,16 +1101,17 @@ def propagate_dynamics(self, molecule, system): def test_retrieve_forcefields(self): """Test a force field can be retrieved""" # Test loading model by specifying version number - generator = EspalomaTemplateGenerator(forcefield='espaloma-0.3.2') + generator = EspalomaTemplateGenerator(forcefield="espaloma-0.3.2") del generator # Test loading model from remote URL - url = 'https://github.com/choderalab/espaloma/releases/download/0.3.2/espaloma-0.3.2.pt' + url = "https://github.com/choderalab/espaloma/releases/download/0.3.2/espaloma-0.3.2.pt" generator = EspalomaTemplateGenerator(forcefield=url) del generator # Test loading model from filename with tempfile.TemporaryDirectory() as tmpdirname: import urllib - filename = os.path.join(tmpdirname, 'model.pt') + + filename = os.path.join(tmpdirname, "model.pt") urllib.request.urlretrieve(url, filename=filename) # Create a new database file generator = EspalomaTemplateGenerator(forcefield=filename) @@ -969,11 +1121,15 @@ def test_energies(self): """Test potential energies match between openff-toolkit and OpenMM ForceField""" # Test all supported SMIRNOFF force fields - for small_molecule_forcefield in EspalomaTemplateGenerator.INSTALLED_FORCEFIELDS: - print(f'Testing energies for {small_molecule_forcefield}...') + for ( + small_molecule_forcefield + ) in EspalomaTemplateGenerator.INSTALLED_FORCEFIELDS: + print(f"Testing energies for {small_molecule_forcefield}...") # Create a generator that knows about a few molecules # TODO: Should the generator also load the appropriate force field files into the ForceField object? - generator = EspalomaTemplateGenerator(molecules=self.molecules, forcefield=small_molecule_forcefield) + generator = EspalomaTemplateGenerator( + molecules=self.molecules, forcefield=small_molecule_forcefield + ) # Create a ForceField openmm_forcefield = openmm.app.ForceField() # Register the template generator @@ -981,7 +1137,11 @@ def test_energies(self): # Parameterize some molecules for molecule in self.molecules: # Create OpenMM System using OpenMM app - openmm_system = openmm_forcefield.createSystem(molecule.to_topology().to_openmm(), removeCMMotion=False, nonbondedMethod=NoCutoff) + openmm_system = openmm_forcefield.createSystem( + molecule.to_topology().to_openmm(), + removeCMMotion=False, + nonbondedMethod=NoCutoff, + ) # Retrieve System generated by Espaloma espaloma_system = generator.get_openmm_system(molecule) @@ -998,23 +1158,35 @@ def test_energies(self): def test_partial_charges_are_none(self): """Test parameterizing a small molecule with `partial_charges=None` instead of zeros (happens frequently in OFFTK>=0.7.0)""" - molecule = Molecule.from_smiles('C=O') + from openff.units import unit + + molecule = Molecule.from_smiles("C=O") molecule.generate_conformers(n_conformers=1) - #molecule._partial_charges = None - assert (molecule.partial_charges is None) or np.all(molecule.partial_charges / unit.elementary_charge == 0) + # molecule._partial_charges = None + assert (molecule.partial_charges is None) or np.all( + molecule.partial_charges / unit.elementary_charge == 0 + ) # Test all supported SMIRNOFF force fields - for small_molecule_forcefield in EspalomaTemplateGenerator.INSTALLED_FORCEFIELDS: - print(f'Testing energies for {small_molecule_forcefield}...') + for ( + small_molecule_forcefield + ) in EspalomaTemplateGenerator.INSTALLED_FORCEFIELDS: + print(f"Testing energies for {small_molecule_forcefield}...") # Create a generator that knows about a few molecules # TODO: Should the generator also load the appropriate force field files into the ForceField object? - generator = EspalomaTemplateGenerator(molecules=[molecule], forcefield=small_molecule_forcefield) + generator = EspalomaTemplateGenerator( + molecules=[molecule], forcefield=small_molecule_forcefield + ) # Create a ForceField openmm_forcefield = openmm.app.ForceField() # Register the template generator openmm_forcefield.registerTemplateGenerator(generator.generator) # Create OpenMM System using OpenMM app - openmm_system = openmm_forcefield.createSystem(molecule.to_topology().to_openmm(), removeCMMotion=False, nonbondedMethod=NoCutoff) - smirnoff_system = generator.get_openmm_system(molecule) + openmm_forcefield.createSystem( + molecule.to_topology().to_openmm(), + removeCMMotion=False, + nonbondedMethod=NoCutoff, + ) + generator.get_openmm_system(molecule) # def test_template_generator_kwargs(self): # """Test """ @@ -1028,21 +1200,28 @@ def test_keyword_arguments_default(self): """ molecule = Molecule.from_smiles("C=O") molecule.generate_conformers(n_conformers=1) - molecule.assign_partial_charges("am1bcc") # Assign partial charges with off toolkit am1bcc method - generator = EspalomaTemplateGenerator(molecules=[molecule], forcefield="espaloma-0.3.2") + molecule.assign_partial_charges( + "am1bcc" + ) # Assign partial charges with off toolkit am1bcc method + generator = EspalomaTemplateGenerator( + molecules=[molecule], forcefield="espaloma-0.3.2" + ) # Create forcefield object forcefield = ForceField() # Register the template generator forcefield.registerTemplateGenerator(generator.generator) # Create system - system = forcefield.createSystem(molecule.to_topology().to_openmm(), nonbondedMethod=NoCutoff) + system = forcefield.createSystem( + molecule.to_topology().to_openmm(), nonbondedMethod=NoCutoff + ) # Make sure passing through the EspalomaGenerator didn't change the charges assert self.charges_are_equal(system, molecule), "Expected equal charges." # Assert the reference forcefield is the default "openff_unconstrained-2.0.0" default_ref_ff = "openff_unconstrained-2.0.0" generator_ref_ff = generator._reference_forcefield - assert generator_ref_ff == default_ref_ff, f"Expected {default_ref_ff}, received {generator_ref_ff}." - + assert ( + generator_ref_ff == default_ref_ff + ), f"Expected {default_ref_ff}, received {generator_ref_ff}." def test_keyword_arguments(self): """ @@ -1059,24 +1238,34 @@ def test_keyword_arguments(self): """ molecule = Molecule.from_smiles("C=O") molecule.generate_conformers(n_conformers=1) - molecule.assign_partial_charges("am1bcc") # Assign partial charges with off toolkit am1bcc method + molecule.assign_partial_charges( + "am1bcc" + ) # Assign partial charges with off toolkit am1bcc method # Custom generator kwargs espaloma_generator_kwargs = { "reference_forcefield": "openff_unconstrained-2.1.0", "charge_method": "nn", } - generator = EspalomaTemplateGenerator(molecules=[molecule], - forcefield="espaloma-0.3.2", - template_generator_kwargs=espaloma_generator_kwargs) + generator = EspalomaTemplateGenerator( + molecules=[molecule], + forcefield="espaloma-0.3.2", + template_generator_kwargs=espaloma_generator_kwargs, + ) # Create forcefield object forcefield = ForceField() # Register the template generator forcefield.registerTemplateGenerator(generator.generator) # Create system - system = forcefield.createSystem(molecule.to_topology().to_openmm(), nonbondedMethod=NoCutoff) + system = forcefield.createSystem( + molecule.to_topology().to_openmm(), nonbondedMethod=NoCutoff + ) # Make sure passing through the EspalomaGenerator changes the charges - assert not self.charges_are_equal(system, molecule), "Expected different charges" + assert not self.charges_are_equal( + system, molecule + ), "Expected different charges" # Assert the reference forcefield is "openff_unconstrained-2.1.0" expected_ref_ff = "openff_unconstrained-2.1.0" generator_ref_ff = generator._reference_forcefield - assert generator_ref_ff == expected_ref_ff, f"Expected {expected_ref_ff}, received {generator_ref_ff}." \ No newline at end of file + assert ( + generator_ref_ff == expected_ref_ff + ), f"Expected {expected_ref_ff}, received {generator_ref_ff}." diff --git a/openmmforcefields/utils.py b/openmmforcefields/utils.py index 110f0fe2..44baccfd 100644 --- a/openmmforcefields/utils.py +++ b/openmmforcefields/utils.py @@ -1,8 +1,11 @@ import contextlib +import functools import logging +import time _logger = logging.getLogger("openmmforcefields.generators.gaff") + def get_ffxml_path(): """ Return the path where OpenMM ffxml forcefield files are stored in this package. @@ -13,9 +16,11 @@ def get_ffxml_path(): The absolute path where OpenMM ffxml forcefield files are stored in this package """ from pkg_resources import resource_filename - filename = resource_filename('openmmforcefields', 'ffxml') + + filename = resource_filename("openmmforcefields", "ffxml") return filename + def get_data_filename(relative_path): """get the full path to one of the reference files shipped for testing @@ -34,18 +39,25 @@ def get_data_filename(relative_path): """ from pkg_resources import resource_filename - fn = resource_filename('openmmforcefields', 'data/' + relative_path) + + fn = resource_filename("openmmforcefields", "data/" + relative_path) import os + if not os.path.exists(fn): - raise ValueError("sorry! %s does not exist. if you just added it, you'll have to re-install" % fn) + raise ValueError( + "sorry! %s does not exist. if you just added it, you'll have to re-install" + % fn + ) return fn + # ============================================================================= # BENCHMARKING UTILITIES # ============================================================================= + @contextlib.contextmanager def time_it(task_name): """Context manager to log execution time of a block of code. @@ -72,12 +84,15 @@ def with_timer(task_name): The name of the task that will be reported. """ + def _with_timer(func): @functools.wraps(func) def _wrapper(*args, **kwargs): with time_it(task_name): return func(*args, **kwargs) + return _wrapper + return _with_timer @@ -129,27 +144,29 @@ def reset_timing_statistics(self, benchmark_id=None): self._t1.pop(benchmark_id, None) self._completed.pop(benchmark_id, None) - def start(self, benchmark_id='default'): + def start(self, benchmark_id="default"): """Start a timer with given benchmark_id.""" import time + self._t0[benchmark_id] = time.time() - def stop(self, benchmark_id='default'): + def stop(self, benchmark_id="default"): try: t0 = self._t0[benchmark_id] except KeyError: _logger.warning(f"Can't stop timing for {benchmark_id}") else: import time + self._t1[benchmark_id] = time.time() elapsed_time = self._t1[benchmark_id] - t0 self._completed[benchmark_id] = elapsed_time return elapsed_time - def interval(self, benchmark_id='default'): + def interval(self, benchmark_id="default"): return self._completed[benchmark_id] - def partial(self, benchmark_id='default'): + def partial(self, benchmark_id="default"): """Return the elapsed time of the given benchmark so far.""" try: t0 = self._t0[benchmark_id] @@ -173,7 +190,7 @@ def report_timing(self, clear=True): """ for benchmark_id, elapsed_time in self._completed.items(): - _logger.debug(f'{benchmark_id} took {elapsed_time:8.3f}s') + _logger.debug(f"{benchmark_id} took {elapsed_time:8.3f}s") if clear is True: self.reset_timing_statistics() diff --git a/setup.cfg b/setup.cfg index 25143d4c..93b676fc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,22 +1,17 @@ -# Helper file to handle all configs - [coverage:run] -# .coveragerc to control coverage.py and pytest-cov omit = - # Omit the tests */tests/* - # Omit generated versioneer openmmforcefields/_version.py -[yapf] -# YAPF, in .style.yapf files this shows up as "[style]" header -COLUMN_LIMIT = 119 -INDENT_WIDTH = 4 -USE_TABS = False +[isort] +profile=black [flake8] -# Flake8, PyFlakes, etc max-line-length = 119 +ignore = E203,E501,W605,W503 +per-file-ignores = + openmmforcefields/__init__.py:F401 + openmmforcefields/generators/__init__.py:F401 [versioneer] # Automatic version numbering scheme