Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{bio}[foss/2021a] OpenFold v1.0.0, colossalai v0.1.8, einops v0.4.1, OpenMM 7.5.1 (incl. AlphaFold patch) w/ Python 3.9.5 + CUDA 11.3.1 #15971

Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
easyblock = 'PythonBundle'

name = 'colossalai'
version = '0.1.8'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://colossalai.org/'
description = """Colossal-AI: A Unified Deep Learning System for Big Model Era"""

toolchain = {'name': 'foss', 'version': '2021a'}

dependencies = [
('Python', '3.9.5'),
('CUDA', '11.3.1', '', True),
('SciPy-bundle', '2021.05'),
('PyTorch-Lightning', '1.5.9', versionsuffix),
('torchvision', '0.11.1', versionsuffix),
]

use_pip = True
sanity_pip_check = True

exts_list = [
('cfgv', '3.3.1', {
'checksums': ['f5a830efb9ce7a445376bb66ec94c638a9787422f96264c98edc6bdeed8ab736'],
}),
('identify', '2.5.1', {
'checksums': ['3d11b16f3fe19f52039fb7e39c9c884b21cb1b586988114fbe42671f03de3e82'],
}),
('nodeenv', '1.6.0', {
'checksums': ['3ef13ff90291ba2a4a7a4ff9a979b63ffdd00a464dbe04acf0ea6471517a4c2b'],
}),
('pre_commit', '2.19.0', {
'checksums': ['4233a1e38621c87d9dda9808c6606d7e7ba0e087cd56d3fe03202a01d2919615'],
}),
('commonmark', '0.9.1', {
'checksums': ['452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60'],
}),
('rich', '12.4.4', {
'checksums': ['4c586de507202505346f3e32d1363eb9ed6932f0c2f63184dea88983ff4971e2'],
}),
('invoke', '1.7.1', {
'checksums': ['7b6deaf585eee0a848205d0b8c0014b9bf6f287a8eb798818a642dff1df14b19'],
}),
('fabric', '2.7.1', {
'checksums': ['76f8fef59cf2061dbd849bbce4fe49bdd820884385004b0ca59136ac3db129e4'],
}),
(name, version, {
'checksums': ['3a2cdd4dc2d8b4832fa132a0bd1102f86c38f6865d7f119018404069d35984b2'],
}),
]

moduleclass = 'ai'
29 changes: 29 additions & 0 deletions easybuild/easyconfigs/e/einops/einops-0.4.1-GCCcore-10.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
easyblock = 'PythonPackage'

name = 'einops'
version = '0.4.1'

homepage = 'https://einops.rocks/'
description = """
Flexible and powerful tensor operations for readable and reliable code.
Supports numpy, pytorch, tensorflow, jax, and others."""

toolchain = {'name': 'GCCcore', 'version': '10.3.0'}

sources = [SOURCE_TAR_GZ]
checksums = ['65ede824fa54ce99ba969c61152f9948eb8cad08d5f0ca97c95e3804bafcce48']

builddependencies = [
('binutils', '2.36.1'),
]

dependencies = [
('Python', '3.9.5'),
]

download_dep_fail = True
use_pip = True

sanity_pip_check = True

moduleclass = 'math'
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
easyblock = 'PythonBundle'

name = 'OpenFold'
version = '1.0.0'
versionsuffix = '-CUDA-%(cudaver)s'

homepage = 'https://github.com/aqlaboratory/openfold'
description = "A faithful PyTorch reproduction of DeepMind's AlphaFold 2"

toolchain = {'name': 'foss', 'version': '2021a'}

builddependencies = [
('CMake', '3.20.1'), # required to build ninja
migueldiascosta marked this conversation as resolved.
Show resolved Hide resolved
]

dependencies = [
('Python', '3.9.5'),
('CUDA', '11.3.1', '', True),
('SciPy-bundle', '2021.05'),
('PyYAML', '5.4.1'),
('Biopython', '1.79'),
('HH-suite', '3.3.0'),
('HMMER', '3.3.2'),
('Kalign', '3.3.1'),
('UCX-CUDA', '1.10.0', versionsuffix),
('cuDNN', '8.2.1.32', versionsuffix, True),
('NCCL', '2.10.3', versionsuffix),
('dm-tree', '0.1.6'),
('einops', '0.4.1'),
('colossalai', '0.1.8', versionsuffix),
('scikit-build', '0.11.1'),
('OpenMM', '7.5.1', '_AlphaFold'), # patched OpenMM
migueldiascosta marked this conversation as resolved.
Show resolved Hide resolved
]

use_pip = True

exts_list = [
('PDBFixer', '1.7', {
'source_urls': ['https://github.com/openmm/pdbfixer/archive/refs/tags/'],
'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': '%(name)s-%(version)s.tar.gz'}],
'checksums': ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea'],
}),
('ninja', '1.10.2.3', {
'checksums': ['e1b86ad50d4e681a7dbdff05fc23bb52cb773edb90bc428efba33fa027738408'],
}),
Comment on lines +47 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does it need that specific version? We have Ninja-1.10.1-GCCcore-10.2.0.eb which is this thing (to be used as a builddep only i assume?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but that provides only the ninja binary, and deepspeed requires the ninja python package...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we could enhance the (recent) Ninja easyconfigs to also install the Python bindings (I'm looking into that, PR coming soon...)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see #16025

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the ninja Python package, provided by https://github.com/scikit-build/ninja-python-distributions, is actually a shim package, a very light-weight wrapper around the ninja binary so you can declare a dependency on it in setup.py & co...
With that in mind, I think it should be OK to just strip out the requirement for ninja as long as we provide the traditional Ninja as a (build) depemdency.

I'll look into this (and then close #16025 since that PR doesn't make much sense then)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, just stripping out the requirement for the ninja Python package won't be a good idea, since deepspeed really does require it (if only to check whether Ninja is available), see https://github.com/microsoft/DeepSpeed/blob/316c4a43e0802a979951ee17f735daf77ea9780f/deepspeed/env_report.py#L54-L59

So unless we can somehow make the ninja Python package point to an existing Ninja installation rather than having it install it's own ninja binary, this may be a necessary evil that's hard to avoid... :-/

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I closed #16025, since that's clearly not the correct way forward.

Since the ninja Python package is also a runtime dependency for deepspeed, I don't see a better way out than the current approach being used here: install ninja as an extension in OpenFold (as opposed to trying to use the classic Ninja installation as a dependency somehow).

('hjson', '3.0.2', {
'checksums': ['2838fd7200e5839ea4516ece953f3a19892c41089f0d933ba3f68e596aacfcd5'],
}),
('py-cpuinfo', '8.0.0', {
'modulename': 'cpuinfo',
'checksums': ['5f269be0e08e33fd959de96b34cd4aeeeacac014dd8305f70eb28d06de2345c5'],
}),
('triton', version, {
'source_tmpl': '%(name)s-%(version)s-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl',
'checksums': ['37b8d0eb36ed7631a6f9d01bd3183f900ae7dbd9e5e40112468a3568505671dc'],
}),
('deepspeed', '0.5.9', {
'checksums': ['7c43d151b51d346a430034e77764097c4af7637217c08503291c48c37ae7d090'],
}),
('contextlib2', '21.6.0', {
'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'],
}),
('ml_collections', '0.1.0', {
'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ",
'checksums': ['59a17fcd1c140153009788517f304caaddd7a94f06690f9f0ed09987beebcf3c'],
}),
('dllogger', version, {
'source_urls': ['https://github.com/NVIDIA/dllogger/archive/refs/tags/'],
'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': '%(name)s-%(version)s.tar.gz'}],
'checksums': ['43e5e3c3acf891dfe6151f7d869f3ad2424772fe57fd8dcb0a45bad06de93bf7'],
}),
(name, version, {
'source_urls': ['https://github.com/aqlaboratory/openfold/archive/refs/tags/'],
'sources': ['v%(version)s.tar.gz'],
'checksums': ['543cb0d36a6118a60de4b4ec2f4a49ebcc965523e5b31e9ad03425de367384a7'],
}),
]

sanity_check_paths = {
'files': ['bin/pdbfixer'],
'dirs': ['lib/python%(pyshortver)s/site-packages'],
}

sanity_check_commands = [
"pdbfixer --help",
"python -c 'import openfold'",
migueldiascosta marked this conversation as resolved.
Show resolved Hide resolved
]

sanity_pip_check = True

moduleclass = 'bio'
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
easyblock = 'CMakeMake'

name = 'OpenMM'
version = '7.5.1'
versionsuffix = '_AlphaFold'
migueldiascosta marked this conversation as resolved.
Show resolved Hide resolved

homepage = 'https://openmm.org'
description = "OpenMM is a toolkit for molecular simulation."

toolchain = {'name': 'foss', 'version': '2021a'}
toolchainopts = {'opt': True}

source_urls = ['https://github.com/openmm/openmm/archive/']
sources = ['%(version)s.tar.gz']
patches = [('OpenMM-%(version)s_AlphaFold.patch', 'wrappers/python')]
checksums = [
'c88d6946468a2bde2619acb834f57b859b5e114a93093cf562165612e10f4ff7',
'1b109dfff3af5c6aa70690bca14618612953c68840a7e64f679db7ca33c1aff6',
]

builddependencies = [
('CMake', '3.20.1'),
('Doxygen', '1.9.1'),
]

separate_build_dir = True

dependencies = [
('Python', '3.9.5'),
('SciPy-bundle', '2021.05'),
('SWIG', '4.0.2'),
]

runtest = """test -e ARGS="-E \'(Integrator)|(Thermostat)|(Barostat)|(Rpmd)|(Amoeba)|(HippoNonbondedForce)\'" """

preinstallopts = ' export OPENMM_INCLUDE_PATH=%(installdir)s/include && '
preinstallopts += ' export OPENMM_LIB_PATH=%(installdir)s/lib && '

# required to install the python API
installopts = ' && cd python && python setup.py build && python setup.py install --prefix=%(installdir)s'

sanity_check_paths = {
'files': ['lib/libOpenMM.%s' % SHLIB_EXT, 'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py'],
'dirs': []
}

sanity_check_commands = ["python -m simtk.testInstallation"]

modextrapaths = {
'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages',
'OPENMM_INCLUDE_PATH': 'include',
'OPENMM_LIB_PATH': 'lib',
}

moduleclass = 'bio'
45 changes: 45 additions & 0 deletions easybuild/easyconfigs/o/OpenMM/OpenMM-7.5.1_AlphaFold.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
custom patch for OpenMM to use in conjunction with AlphaFold
see https://github.com/deepmind/alphafold/blob/main/docker/openmm.patch

Index: simtk/openmm/app/topology.py
===================================================================
--- simtk.orig/openmm/app/topology.py
+++ simtk/openmm/app/topology.py
@@ -356,19 +356,35 @@
def isCyx(res):
names = [atom.name for atom in res._atoms]
return 'SG' in names and 'HG' not in names
+ # This function is used to prevent multiple di-sulfide bonds from being
+ # assigned to a given atom. This is a DeepMind modification.
+ def isDisulfideBonded(atom):
+ for b in self._bonds:
+ if (atom in b and b[0].name == 'SG' and
+ b[1].name == 'SG'):
+ return True
+
+ return False

cyx = [res for res in self.residues() if res.name == 'CYS' and isCyx(res)]
atomNames = [[atom.name for atom in res._atoms] for res in cyx]
for i in range(len(cyx)):
sg1 = cyx[i]._atoms[atomNames[i].index('SG')]
pos1 = positions[sg1.index]
+ candidate_distance, candidate_atom = 0.3*nanometers, None
for j in range(i):
sg2 = cyx[j]._atoms[atomNames[j].index('SG')]
pos2 = positions[sg2.index]
delta = [x-y for (x,y) in zip(pos1, pos2)]
distance = sqrt(delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2])
- if distance < 0.3*nanometers:
- self.addBond(sg1, sg2)
+ if distance < candidate_distance and not isDisulfideBonded(sg2):
+ candidate_distance = distance
+ candidate_atom = sg2
+ # Assign bond to closest pair.
+ if candidate_atom:
+ self.addBond(sg1, candidate_atom)
+
+

class Chain(object):
"""A Chain object represents a chain within a Topology."""