Skip to content

Commit

Permalink
Merge pull request #268 from VlachosGroup/core_subpackage
Browse files Browse the repository at this point in the history
initial efforts towards building a core-only package
  • Loading branch information
JacksonBurns authored Mar 1, 2024
2 parents be07183 + 78ced29 commit 05760b1
Show file tree
Hide file tree
Showing 9 changed files with 221 additions and 312 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,14 @@ jobs:
password: ${{ secrets.PYPI_API_TOKEN }}
skip-existing: true
verbose: true

- run: rm -rf dist
- name: Build a binary wheel and a source tarball for core package
run: |
CORE_ONLY=1 python -m build --sdist --wheel --outdir dist/ .
- name: Publish distribution 📦 to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_API_TOKEN }}
skip-existing: true
verbose: true
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
global-include config.yaml *.png *.csv *.md requirements.txt
global-include config.yaml *.png *.csv *.md requirements.txt requirements_core.txt
6 changes: 5 additions & 1 deletion aimsim/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from . import ops
from . import tasks
from . import chemical_datastructures
from . import utils

try:
from . import tasks
except ImportError:
pass # aimsim_core does not include this

__version__ = "2.1.3"
86 changes: 20 additions & 66 deletions aimsim/ops/descriptor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""This module contains methods to featurize molecules."""

import numpy as np
from rdkit.Chem import rdmolops
from rdkit import Chem
Expand All @@ -10,13 +11,8 @@
from rdkit.DataStructs import cDataStructs
from aimsim.utils.ccbmlib_fingerprints import generate_fingerprints
from padelpy import from_smiles
from aimsim.utils.extras import requires_mordred
from mhfp.encoder import MHFPEncoder

try:
from mordred import Calculator, descriptors
except ImportError:
NO_MORDRED = True
from mordred import Calculator, descriptors


from ..exceptions import (
Expand Down Expand Up @@ -59,10 +55,7 @@ def to_numpy(self):
"""
if self.check_init() is False:
raise NotInitializedError(
"Descriptor value not generated. Use "
"make_fingerprint() to initialize it."
)
raise NotInitializedError("Descriptor value not generated. Use " "make_fingerprint() to initialize it.")
if not hasattr(self, "numpy_"):
self.numpy_ = np.zeros((0,), dtype=np.int8)
DataStructs.ConvertToNumpyArray(self.rdkit_, self.numpy_)
Expand All @@ -82,15 +75,9 @@ def to_rdkit(self):
"""
if self.check_init() is False:
raise NotInitializedError(
"Descriptor value not generated. Use "
"make_fingerprint() to initialize it."
)
raise NotInitializedError("Descriptor value not generated. Use " "make_fingerprint() to initialize it.")
if not hasattr(self, "rdkit_"):
raise ValueError(
"Attempting to convert arbitrary numpy array "
"to rdkit bit vector is not supported"
)
raise ValueError("Attempting to convert arbitrary numpy array " "to rdkit bit vector is not supported")
return self.rdkit_

def check_init(self):
Expand All @@ -100,10 +87,7 @@ def check_init(self):
(bool): True if object is initialized.
"""
return (
getattr(self, "numpy_", None) is not None
or getattr(self, "rdkit_", None) is not None
)
return getattr(self, "numpy_", None) is not None or getattr(self, "rdkit_", None) is not None

def _set_morgan_fingerprint(self, molecule_graph, radius, n_bits, **kwargs):
"""Set the descriptor to a morgan fingerprint.
Expand All @@ -117,15 +101,11 @@ def _set_morgan_fingerprint(self, molecule_graph, radius, n_bits, **kwargs):
as count.
"""
self.rdkit_ = AllChem.GetMorganFingerprintAsBitVect(
molecule_graph, radius, nBits=n_bits, **kwargs
)
self.rdkit_ = AllChem.GetMorganFingerprintAsBitVect(molecule_graph, radius, nBits=n_bits, **kwargs)
self.label_ = "morgan_fingerprint"
self.params_ = {"radius": radius, "n_bits": n_bits}

def _set_rdkit_topological_fingerprint(
self, molecule_graph, min_path, max_path, **kwargs
):
def _set_rdkit_topological_fingerprint(self, molecule_graph, min_path, max_path, **kwargs):
"""Set the descriptor to a topological fingerprint.
Args:
Expand All @@ -147,9 +127,7 @@ def _set_rdkit_topological_fingerprint(
f"greater than the minimum path "
f"used for fingerprint."
)
self.rdkit_ = rdmolops.RDKFingerprint(
molecule_graph, minPath=min_path, maxPath=max_path
)
self.rdkit_ = rdmolops.RDKFingerprint(molecule_graph, minPath=min_path, maxPath=max_path)
self.label_ = "topological_fingerprint"
self.params_ = {"min_path": min_path, "max_path": max_path}

Expand All @@ -167,11 +145,8 @@ def _set_padelpy_descriptor(self, molecule_graph, descriptor, **kwargs):
self.numpy_ = np.array(all_desc[descriptor])
self.label_ = descriptor
except Exception as e:
raise RuntimeError(
f"Unable to retrieve PaDELPy Descriptor '{descriptor}'. Is it a valid descriptor?"
)
raise RuntimeError(f"Unable to retrieve PaDELPy Descriptor '{descriptor}'. Is it a valid descriptor?")

@requires_mordred
def _set_mordred_descriptor(self, molecule_graph, descriptor, **kwargs):
"""Set the value of numpy_ to the descriptor as indicated by descriptor.
Expand All @@ -192,7 +167,7 @@ def _set_mordred_descriptor(self, molecule_graph, descriptor, **kwargs):
except KeyError:
raise MordredCalculatorError(
"""Mordred descriptor calculator unable to calculate descriptor \"{}\",
ensure correct name is used (https://mordred-descriptor.github.io/documentation/master/descriptors.html).""".format(
ensure correct name is used (https://jacksonburns.github.io/mordred-community/descriptors.html).""".format(
descriptor
)
)
Expand Down Expand Up @@ -286,9 +261,7 @@ def _set_minhash_fingerprint(self, molecule_graph, **kwargs):
self.label_ = "minhash_fingerprint"
self.params_ = kwargs

def make_fingerprint(
self, molecule_graph, fingerprint_type, fingerprint_params=None
):
def make_fingerprint(self, molecule_graph, fingerprint_type, fingerprint_params=None):
"""Make fingerprint of a molecule based on a graph representation.
Set the state of the descriptor to this fingerprint.
Expand Down Expand Up @@ -316,15 +289,11 @@ def make_fingerprint(
"kekulize": True,
}
minhash_params.update(fingerprint_params)
self._set_minhash_fingerprint(
molecule_graph=molecule_graph, **minhash_params
)
self._set_minhash_fingerprint(molecule_graph=molecule_graph, **minhash_params)
elif fingerprint_type == "topological_fingerprint":
topological_params = {"min_path": 1, "max_path": 7}
topological_params.update(fingerprint_params)
self._set_rdkit_topological_fingerprint(
molecule_graph=molecule_graph, **topological_params
)
self._set_rdkit_topological_fingerprint(molecule_graph=molecule_graph, **topological_params)
elif fingerprint_type == "daylight_fingerprint":
daylight_params = {
"minPath": 1,
Expand All @@ -336,25 +305,19 @@ def make_fingerprint(
"minSize": 64,
}
daylight_params.update(fingerprint_params)
self._set_daylight_fingerprint(
molecule_graph=molecule_graph, **daylight_params
)
self._set_daylight_fingerprint(molecule_graph=molecule_graph, **daylight_params)
elif fingerprint_type == "maccs_keys":
maccs_params = {}
maccs_params.update(fingerprint_params)
self._set_maccs_keys(molecule_graph=molecule_graph, **maccs_params)
elif fingerprint_type == "atom-pair_fingerprint":
atom_pair_fp_params = {}
atom_pair_fp_params.update(fingerprint_params)
self._set_atom_pair_fingerprint(
molecule_graph=molecule_graph, **atom_pair_fp_params
)
self._set_atom_pair_fingerprint(molecule_graph=molecule_graph, **atom_pair_fp_params)
elif fingerprint_type == "torsion_fingerprint":
torsion_params = {}
torsion_params.update(fingerprint_params)
self._set_torsion_fingerprint(
molecule_graph=molecule_graph, **torsion_params
)
self._set_torsion_fingerprint(molecule_graph=molecule_graph, **torsion_params)
elif fingerprint_type.split(":")[0] == "mordred":
mordred_params = {}
self._set_mordred_descriptor(
Expand Down Expand Up @@ -418,18 +381,11 @@ def get_folded_fprint(self, fold_to_length):
raise ValueError("Can only fold fingerprints")
fingerprint = self.to_numpy()
if len(fingerprint) < fold_to_length:
raise InvalidConfigurationError(
f"Cannot fold fingerprint of "
f"length {len(fingerprint)}to a "
f"higher length {fold_to_length}"
)
raise InvalidConfigurationError(f"Cannot fold fingerprint of " f"length {len(fingerprint)}to a " f"higher length {fold_to_length}")
n_folds = np.log2(len(fingerprint) / fold_to_length)
if n_folds - int(n_folds) > 0.0:
raise InvalidConfigurationError(
f"Fingerprint length "
f"{len(fingerprint)} not "
f"a 2-multiple of required "
f"folded length {fold_to_length}"
f"Fingerprint length " f"{len(fingerprint)} not " f"a 2-multiple of required " f"folded length {fold_to_length}"
)
for _ in range(int(n_folds)):
mid_point = int(len(fingerprint) / 2)
Expand Down Expand Up @@ -484,9 +440,7 @@ def fold_to_equal_length(fingerprint1, fingerprint2):
fprint2_arr,
)
else:
return fprint1_arr, fingerprint2.get_folded_fprint(
fold_to_length=len(fprint1_arr)
)
return fprint1_arr, fingerprint2.get_folded_fprint(fold_to_length=len(fprint1_arr))

@staticmethod
def get_supported_fprints():
Expand Down
17 changes: 0 additions & 17 deletions aimsim/utils/extras.py

This file was deleted.

Loading

0 comments on commit 05760b1

Please sign in to comment.