From cbba4c3973a1ba169cf66f86129a371a23ebc661 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 13:24:27 -0600 Subject: [PATCH 001/191] Created new genutils.attrs submodule for inspecting attributes --- polymerist/genutils/attrs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 polymerist/genutils/attrs.py diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py new file mode 100644 index 0000000..7603c60 --- /dev/null +++ b/polymerist/genutils/attrs.py @@ -0,0 +1,18 @@ +'''For dynamically inspecting and modifying attributes of Python objects''' + +from typing import Any, Optional +import re + + +def compile_simple_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: + '''Takes an object and returns a dict of the return values of all argument-free methods of the objects + Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' + getable_dict = {} + for attr_name in dir(obj): + if re.search(getter_str, attr_name): + try: + attr_key = attr_name if (repl_str is None) else re.sub(getter_str, repl_str, attr_name) + getable_dict[attr_key] = getattr(obj, attr_name)() + except (TypeError, Exception): # TODO : find way to selectively intercept the Boost C++ wrapper ArgumentError + pass + return getable_dict \ No newline at end of file From a38b3daacf2837ef095f647aab8b03e65992a90c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 13:24:59 -0600 Subject: [PATCH 002/191] Moved compiled_simple_gettable_attrs() from genutils.importutils to genutils.attrs; updated dependent imports --- polymerist/genutils/importutils.py | 18 +----------------- polymerist/mdtools/openmmtools/description.py | 2 +- polymerist/rdutils/rdprops.py | 2 +- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/polymerist/genutils/importutils.py b/polymerist/genutils/importutils.py index e6988e1..0ac3642 100644 --- a/polymerist/genutils/importutils.py +++ b/polymerist/genutils/importutils.py @@ -4,9 +4,8 @@ LOGGER = logging.getLogger(__name__) from types import ModuleType -from typing import Any, Generator, Iterable, Optional +from typing import Generator, Iterable, Optional -import re import pkgutil import importlib @@ -14,21 +13,6 @@ from itertools import chain -# ATTRIBUTE GETTING AND SETTING -def compile_simple_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: - '''Takes an object and returns a dict of the return values of all argument-free methods of the objects - Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' - getable_dict = {} - for attr_name in dir(obj): - if re.search(getter_str, attr_name): - try: - attr_key = attr_name if (repl_str is None) else re.sub(getter_str, repl_str, attr_name) - getable_dict[attr_key] = getattr(obj, attr_name)() - except (TypeError, Exception): # TODO : find way to selectively intercept the Boost C++ wrapper ArgumentError - pass - return getable_dict - - # FILETREE CHARACTERS _TREE_WIDTH : int = 4 assert(_TREE_WIDTH > 0) diff --git a/polymerist/mdtools/openmmtools/description.py b/polymerist/mdtools/openmmtools/description.py index 3f3d048..1f1cd94 100644 --- a/polymerist/mdtools/openmmtools/description.py +++ b/polymerist/mdtools/openmmtools/description.py @@ -3,7 +3,7 @@ from typing import Any, Union from openmm import NonbondedForce, System -from ...genutils.importutils import compile_simple_getable_attrs +from ...genutils.attrs import compile_simple_getable_attrs from ...genutils.textual.prettyprint import dict_to_indented_str diff --git a/polymerist/rdutils/rdprops.py b/polymerist/rdutils/rdprops.py index 1d944a4..13d7631 100644 --- a/polymerist/rdutils/rdprops.py +++ b/polymerist/rdutils/rdprops.py @@ -9,7 +9,7 @@ from .labeling.bijection import bijective_atom_id_iter from ..genutils.decorators.functional import optional_in_place from ..genutils.typetools.categorical import _union_member_factory -from ..genutils.importutils import compile_simple_getable_attrs +from ..genutils.attrs import compile_simple_getable_attrs # RDKit-specific generics and type aliases From 83054f69546db5bb991167129c5191764dd55a77 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 18:03:30 -0600 Subject: [PATCH 003/191] Added return type annotations to class decorators --- polymerist/genutils/decorators/classmod.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index f663106..786880a 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -1,10 +1,10 @@ '''Decorators for modifying classes''' -from typing import Callable, Iterable, Optional, TypeVar +from typing import Callable, Iterable, Optional, TypeVar, Union C = TypeVar('C') -def generate_repr(cls : Optional[C]=None, disp_attrs : Optional[Iterable[str]]=None, lookup_attr : Optional[str]=None): +def generate_repr(cls : Optional[C]=None, disp_attrs : Optional[Iterable[str]]=None, lookup_attr : Optional[str]=None) -> Union[C, Callable[[C], C]]: ''' Class decorator for auto-generating __repr__ methods @@ -33,7 +33,7 @@ def _repr_generic(self) -> str: return class_decorator return class_decorator(cls) # return literal class decorator call -def register_subclasses(cls : Optional[C]=None, key_attr : str='__name__', reg_attr : str='subclass_registry') -> Callable[[C], C]: +def register_subclasses(cls : Optional[C]=None, key_attr : str='__name__', reg_attr : str='subclass_registry') -> Union[C, Callable[[C], C]]: ''' Parametric class decorator for automatically generating a registry of subclasses of a target class Binds registry to the "registry" class property in the target class From 3fa94c735d01c7a68c0dbce809a4dd4c72240d25 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 18:55:31 -0600 Subject: [PATCH 004/191] Wrote decorator which dynamically registers abstract class attributes --- polymerist/genutils/decorators/classmod.py | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index 786880a..1ddc020 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -57,3 +57,29 @@ def _registry(cls : C) -> dict[str, C]: if cls is None: # null case (i.e. call without parens), return factory call return class_decorator return class_decorator(cls) # return literal class decorator call + +# NOTE: "klass" is needed to distinguish between the class modified by this decorator and the classmethod arg when calling super() +# "klass" here is the parent, while "cls" is the child +def register_abstract_class_attrs(*attr_names : list[str]) -> Callable[[C], C]: + '''Register a list of string attribute names as abstract class attributes, + which MUST be implemented by child classes of the wrapped class''' + def class_decorator(klass : C) -> C: + '''The actual (argument-free) class decorator''' + def __wrapped_init_subclass__(cls : C, **kwargs) -> None: + '''Wrapper for subclass definition which actually enforces that all named attributes are set''' + for attr_name in attr_names: + passed_attr_value = kwargs.pop(attr_name, NotImplemented) # want this removed from kwargs before passing to super, regardless of whether already set in child + attr_val_on_child = getattr(cls, attr_name, NotImplemented) # check if this has been set in the child in code + + if attr_val_on_child is NotImplemented: # if the value has not been set in code... + if passed_attr_value is not NotImplemented: # ...fall back to value passed into class definition, if it exists... + attr_val_on_child = passed_attr_value + else: # otherwise, fail and raise Exception + raise TypeError(f"Can't instantiate abstract class {cls.__name__} with abstract class property '{attr_name}' undefined") + + super(klass, cls).__init_subclass__(**kwargs) # this should fail if extraneous named args are passed + + klass.__init_subclass__ = classmethod(__wrapped_init_subclass__) + return klass + + return class_decorator # no need for application check here, since the parameterized decorator doesn't take a class to be modified \ No newline at end of file From 357fb91d3fb7e2d04d77ee2e91acc16e3bd06bd2 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 20:25:55 -0600 Subject: [PATCH 005/191] Fixed bug where class attribute was not actually being set in register_abstract_class_attrs --- polymerist/genutils/decorators/classmod.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index 1ddc020..592f469 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -60,7 +60,7 @@ def _registry(cls : C) -> dict[str, C]: # NOTE: "klass" is needed to distinguish between the class modified by this decorator and the classmethod arg when calling super() # "klass" here is the parent, while "cls" is the child -def register_abstract_class_attrs(*attr_names : list[str]) -> Callable[[C], C]: +def register_abstract_class_attrs(*attr_names : list[str]) -> Callable[[C], C]: # TODO: add mechanism for typehinting '''Register a list of string attribute names as abstract class attributes, which MUST be implemented by child classes of the wrapped class''' def class_decorator(klass : C) -> C: @@ -73,7 +73,7 @@ def __wrapped_init_subclass__(cls : C, **kwargs) -> None: if attr_val_on_child is NotImplemented: # if the value has not been set in code... if passed_attr_value is not NotImplemented: # ...fall back to value passed into class definition, if it exists... - attr_val_on_child = passed_attr_value + setattr(cls, attr_name, passed_attr_value) else: # otherwise, fail and raise Exception raise TypeError(f"Can't instantiate abstract class {cls.__name__} with abstract class property '{attr_name}' undefined") From 23c94ef0369b948189c16aa5448464ed19f5db1f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 20:26:14 -0600 Subject: [PATCH 006/191] Created new genutils submodule for generic tree-related functionality --- polymerist/genutils/treetools.py | 108 +++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 polymerist/genutils/treetools.py diff --git a/polymerist/genutils/treetools.py b/polymerist/genutils/treetools.py new file mode 100644 index 0000000..0b96f95 --- /dev/null +++ b/polymerist/genutils/treetools.py @@ -0,0 +1,108 @@ +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' + +from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar +from abc import ABC, abstractmethod + +Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools +T = TypeVar('T') + +from anytree.node import Node +from anytree.exporter import DictExporter + +from .decorators.classmod import register_abstract_class_attrs + + +def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually + '''Produce a simplified tree for performing tests''' + root = Node('f') + b = Node('b', foo='bb', parent=root) + a = Node('a', foo='aa', parent=b) + d = Node('d', foo='dd', parent=b) + c = Node('c', foo='cc', parent=d) + e = Node('e', foo='ee', parent=d) + g = Node('g', foo='gg', parent=root) + i = Node('i', foo='ii', parent=g) + h = Node('h', foo='hh', parent=i) + + return root + + +# TREE COPYING +def copy_node_isolated(node : Node) -> Node: + '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' + node_attrs = { + attr_name : attr + for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children + } + return Node(**node_attrs) + +# NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree +def copy_tree(node : Node, stop : Optional[Filter]=None) -> Node: + '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' + if stop is None: + stop = lambda node : False + + node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes + assert(node_copy.children == tuple()) + if node.children is not None: + for child in node.children: + if stop(child): + continue + child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy.parent = node_copy + + return node_copy + + +# INTERFACES FOR BUILDING TREES FROM OTHER CLASSES +@register_abstract_class_attrs('FROMTYPE') +class AbstractNodeCorrespondence(ABC, Generic[T]): # in concrete implementations, the type of NODETYPE should match T + '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' + @abstractmethod + def name(self, obj : T) -> str: + '''Define how to obtain a string name''' + pass + + @abstractmethod + def has_children(self, obj : T) -> bool: + '''Define how to check if an object can produce children in the first place before attempting to do so''' + pass + + @abstractmethod + def children(self, obj : T) -> Optional[Iterable[T]]: + '''Define how to obtain node children from an instance + Should return NoneType if the instance is "leaf-like"''' + pass + +def compile_tree_factory(node_corresp : AbstractNodeCorrespondence[T], class_alias : Optional[str]=None, obj_attr_name : Optional[str]=None) -> Callable[[T, Optional[int]], Node]: + '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist + if class_alias is None: # an alternative name to use when describing the tree creation for this class + class_alias = node_corresp.FROMTYPE.__name__ + + if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type + obj_attr_name = class_alias + + def compile_tree(obj : T, max_depth : Optional[int]=None, _curr_depth : int=0) -> Node: + # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + node = Node(name=node_corresp.name(obj)) + setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference + + if node_corresp.has_children(obj) and ( # recursively add subnodes IFF + (max_depth is None) # 1) no depth limit is set, or + or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet + ): + for child_obj in node_corresp.children(obj): + sub_node = compile_tree(child_obj, max_depth=max_depth, _curr_depth=_curr_depth+1) + sub_node.parent = node + + return node + + # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) + compile_tree.__doc__ = f''' + Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object + + Any sub-{class_alias} encountered will be expanded into its own tree, + up to the specified maximum depth, or until exhaustion if max_depth=None + ''' + + return compile_tree \ No newline at end of file From 30df13a64d350e7b66dc65534bac108b1a259b10 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 21:01:31 -0600 Subject: [PATCH 007/191] Added option to filter (via "stop" Callable) in compile_tree() --- polymerist/genutils/treetools.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/treetools.py b/polymerist/genutils/treetools.py index 0b96f95..6fe4efa 100644 --- a/polymerist/genutils/treetools.py +++ b/polymerist/genutils/treetools.py @@ -4,6 +4,8 @@ from abc import ABC, abstractmethod Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools +NULL_FILTER : Filter = lambda inp : False # a filter which doesn't do anything (but has the right call signature) + T = TypeVar('T') from anytree.node import Node @@ -40,7 +42,7 @@ def copy_node_isolated(node : Node) -> Node: def copy_tree(node : Node, stop : Optional[Filter]=None) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' if stop is None: - stop = lambda node : False + stop = NULL_FILTER node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes assert(node_copy.children == tuple()) @@ -74,7 +76,12 @@ def children(self, obj : T) -> Optional[Iterable[T]]: Should return NoneType if the instance is "leaf-like"''' pass -def compile_tree_factory(node_corresp : AbstractNodeCorrespondence[T], class_alias : Optional[str]=None, obj_attr_name : Optional[str]=None) -> Callable[[T, Optional[int]], Node]: +def compile_tree_factory( + node_corresp : AbstractNodeCorrespondence[T], + class_alias : Optional[str]=None, + obj_attr_name : Optional[str]=None, + + ) -> Callable[[T, Optional[int]], Node]: '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist if class_alias is None: # an alternative name to use when describing the tree creation for this class class_alias = node_corresp.FROMTYPE.__name__ @@ -82,8 +89,12 @@ def compile_tree_factory(node_corresp : AbstractNodeCorrespondence[T], class_ali if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - def compile_tree(obj : T, max_depth : Optional[int]=None, _curr_depth : int=0) -> Node: + + def compile_tree(obj : T, max_depth : Optional[int]=None, stop : Optional[Filter]=None, _curr_depth : int=0) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + if stop is None: # to blacklist certain branches from being formed + stop = NULL_FILTER + node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference @@ -92,8 +103,9 @@ def compile_tree(obj : T, max_depth : Optional[int]=None, _curr_depth : int=0) or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet ): for child_obj in node_corresp.children(obj): - sub_node = compile_tree(child_obj, max_depth=max_depth, _curr_depth=_curr_depth+1) - sub_node.parent = node + if not stop(child_obj): + sub_node = compile_tree(child_obj, max_depth=max_depth, stop=stop, _curr_depth=_curr_depth+1) + sub_node.parent = node return node From edd5e93abf00c3c09260378b74f22155fea4b0c0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 13:22:20 -0600 Subject: [PATCH 008/191] Broke treetools into component submodules --- polymerist/genutils/treetools.py | 120 -------------------- polymerist/genutils/treetools/__init__.py | 7 ++ polymerist/genutils/treetools/test_trees.py | 17 +++ polymerist/genutils/treetools/treecopy.py | 34 ++++++ polymerist/genutils/treetools/treeinter.py | 70 ++++++++++++ 5 files changed, 128 insertions(+), 120 deletions(-) delete mode 100644 polymerist/genutils/treetools.py create mode 100644 polymerist/genutils/treetools/__init__.py create mode 100644 polymerist/genutils/treetools/test_trees.py create mode 100644 polymerist/genutils/treetools/treecopy.py create mode 100644 polymerist/genutils/treetools/treeinter.py diff --git a/polymerist/genutils/treetools.py b/polymerist/genutils/treetools.py deleted file mode 100644 index 6fe4efa..0000000 --- a/polymerist/genutils/treetools.py +++ /dev/null @@ -1,120 +0,0 @@ -'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' - -from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar -from abc import ABC, abstractmethod - -Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools -NULL_FILTER : Filter = lambda inp : False # a filter which doesn't do anything (but has the right call signature) - -T = TypeVar('T') - -from anytree.node import Node -from anytree.exporter import DictExporter - -from .decorators.classmod import register_abstract_class_attrs - - -def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually - '''Produce a simplified tree for performing tests''' - root = Node('f') - b = Node('b', foo='bb', parent=root) - a = Node('a', foo='aa', parent=b) - d = Node('d', foo='dd', parent=b) - c = Node('c', foo='cc', parent=d) - e = Node('e', foo='ee', parent=d) - g = Node('g', foo='gg', parent=root) - i = Node('i', foo='ii', parent=g) - h = Node('h', foo='hh', parent=i) - - return root - - -# TREE COPYING -def copy_node_isolated(node : Node) -> Node: - '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' - node_attrs = { - attr_name : attr - for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children - } - return Node(**node_attrs) - -# NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter]=None) -> Node: - '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - if stop is None: - stop = NULL_FILTER - - node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes - assert(node_copy.children == tuple()) - if node.children is not None: - for child in node.children: - if stop(child): - continue - child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion - child_copy.parent = node_copy - - return node_copy - - -# INTERFACES FOR BUILDING TREES FROM OTHER CLASSES -@register_abstract_class_attrs('FROMTYPE') -class AbstractNodeCorrespondence(ABC, Generic[T]): # in concrete implementations, the type of NODETYPE should match T - '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' - @abstractmethod - def name(self, obj : T) -> str: - '''Define how to obtain a string name''' - pass - - @abstractmethod - def has_children(self, obj : T) -> bool: - '''Define how to check if an object can produce children in the first place before attempting to do so''' - pass - - @abstractmethod - def children(self, obj : T) -> Optional[Iterable[T]]: - '''Define how to obtain node children from an instance - Should return NoneType if the instance is "leaf-like"''' - pass - -def compile_tree_factory( - node_corresp : AbstractNodeCorrespondence[T], - class_alias : Optional[str]=None, - obj_attr_name : Optional[str]=None, - - ) -> Callable[[T, Optional[int]], Node]: - '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist - if class_alias is None: # an alternative name to use when describing the tree creation for this class - class_alias = node_corresp.FROMTYPE.__name__ - - if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type - obj_attr_name = class_alias - - - def compile_tree(obj : T, max_depth : Optional[int]=None, stop : Optional[Filter]=None, _curr_depth : int=0) -> Node: - # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function - if stop is None: # to blacklist certain branches from being formed - stop = NULL_FILTER - - node = Node(name=node_corresp.name(obj)) - setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference - - if node_corresp.has_children(obj) and ( # recursively add subnodes IFF - (max_depth is None) # 1) no depth limit is set, or - or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet - ): - for child_obj in node_corresp.children(obj): - if not stop(child_obj): - sub_node = compile_tree(child_obj, max_depth=max_depth, stop=stop, _curr_depth=_curr_depth+1) - sub_node.parent = node - - return node - - # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) - compile_tree.__doc__ = f''' - Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object - - Any sub-{class_alias} encountered will be expanded into its own tree, - up to the specified maximum depth, or until exhaustion if max_depth=None - ''' - - return compile_tree \ No newline at end of file diff --git a/polymerist/genutils/treetools/__init__.py b/polymerist/genutils/treetools/__init__.py new file mode 100644 index 0000000..de6724f --- /dev/null +++ b/polymerist/genutils/treetools/__init__.py @@ -0,0 +1,7 @@ +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' + +from typing import Any, Callable, TypeVar + +T = TypeVar('T') +Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools +NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) diff --git a/polymerist/genutils/treetools/test_trees.py b/polymerist/genutils/treetools/test_trees.py new file mode 100644 index 0000000..1eccda2 --- /dev/null +++ b/polymerist/genutils/treetools/test_trees.py @@ -0,0 +1,17 @@ +'''Unit tests for trees''' + +from anytree.node import Node + +def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually + '''Produce a simplified tree for performing tests''' + root = Node('f') + b = Node('b', foo='bb', parent=root) + a = Node('a', foo='aa', parent=b) + d = Node('d', foo='dd', parent=b) + c = Node('c', foo='cc', parent=d) + e = Node('e', foo='ee', parent=d) + g = Node('g', foo='gg', parent=root) + i = Node('i', foo='ii', parent=g) + h = Node('h', foo='hh', parent=i) + + return root \ No newline at end of file diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py new file mode 100644 index 0000000..2d96717 --- /dev/null +++ b/polymerist/genutils/treetools/treecopy.py @@ -0,0 +1,34 @@ +'''Tools for copying parts and wholes of trees, at various levels of resolution''' + +from typing import Optional + +from anytree.node import Node +from anytree.exporter import DictExporter + +from . import Filter, NULL_FILTER + + +def copy_node_isolated(node : Node) -> Node: + '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' + node_attrs = { + attr_name : attr + for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children + } + return Node(**node_attrs) + +# NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree +def copy_tree(node : Node, stop : Optional[Filter[Node]]=None) -> Node: + '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' + if stop is None: + stop = NULL_FILTER + + node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes + assert(node_copy.children == tuple()) + if node.children is not None: + for child in node.children: + if stop(child): + continue + child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy.parent = node_copy + + return node_copy \ No newline at end of file diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py new file mode 100644 index 0000000..930a853 --- /dev/null +++ b/polymerist/genutils/treetools/treeinter.py @@ -0,0 +1,70 @@ +'''Tools for interfacing and representing arbitrary external classes with tree-like data structures''' + +from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar +from abc import ABC, abstractmethod + +T = TypeVar('T') + +from anytree.node import Node +from anytree.exporter import DictExporter + +from ..decorators.classmod import register_abstract_class_attrs +from . import Filter, NULL_FILTER + + +@register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE +class AbstractNodeCorrespondence(ABC, Generic[T]): + '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' + @abstractmethod + def name(self, obj : T) -> str: + '''Define how to obtain a string name''' + pass + + @abstractmethod + def has_children(self, obj : T) -> bool: + '''Define how to check if an object can produce children in the first place before attempting to do so''' + pass + + @abstractmethod + def children(self, obj : T) -> Optional[Iterable[T]]: + '''Define how to obtain node children from an instance + Should return NoneType if the instance is "leaf-like"''' + pass + +def compile_tree_factory( + node_corresp : AbstractNodeCorrespondence[T], + class_alias : Optional[str]=None, + obj_attr_name : Optional[str]=None, + ) -> Callable[[T, Optional[int]], Node]: + '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist + if class_alias is None: # an alternative name to use when describing the tree creation for this class + class_alias = node_corresp.FROMTYPE.__name__ + + if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type + obj_attr_name = class_alias + + def compile_tree(obj : T, max_depth : Optional[int]=None, exclude : Optional[Filter[T]]=NULL_FILTER, _curr_depth : int=0) -> Node: + # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + node = Node(name=node_corresp.name(obj)) + setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference + + if node_corresp.has_children(obj) and ( # recursively add subnodes IFF + (max_depth is None) # 1) no depth limit is set, or + or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet + ): + for child_obj in node_corresp.children(obj): + if not exclude(child_obj): + sub_node = compile_tree(child_obj, max_depth=max_depth, exclude=exclude, _curr_depth=_curr_depth+1) + sub_node.parent = node + + return node + + # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) + compile_tree.__doc__ = f''' + Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object + + Any sub-{class_alias} encountered will be expanded into its own tree, + up to the specified maximum depth, or until exhaustion if max_depth=None + ''' + + return compile_tree \ No newline at end of file From 7338e7fce003e6c467671e9a7d919826c526a137 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 14:41:52 -0600 Subject: [PATCH 009/191] Moved filter typehints to their own dedicated module --- polymerist/genutils/filters.py | 8 ++++++++ polymerist/genutils/treetools/__init__.py | 8 +------- polymerist/genutils/treetools/treecopy.py | 13 +++++-------- polymerist/genutils/treetools/treeinter.py | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) create mode 100644 polymerist/genutils/filters.py diff --git a/polymerist/genutils/filters.py b/polymerist/genutils/filters.py new file mode 100644 index 0000000..bb2eb80 --- /dev/null +++ b/polymerist/genutils/filters.py @@ -0,0 +1,8 @@ +'''Typehinting and generic implementations of filter (indicator) functions''' + +from typing import Callable, TypeVar + +T = TypeVar('T') +Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools + +NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) \ No newline at end of file diff --git a/polymerist/genutils/treetools/__init__.py b/polymerist/genutils/treetools/__init__.py index de6724f..b555128 100644 --- a/polymerist/genutils/treetools/__init__.py +++ b/polymerist/genutils/treetools/__init__.py @@ -1,7 +1 @@ -'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' - -from typing import Any, Callable, TypeVar - -T = TypeVar('T') -Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools -NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' \ No newline at end of file diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 2d96717..747790e 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -5,25 +5,22 @@ from anytree.node import Node from anytree.exporter import DictExporter -from . import Filter, NULL_FILTER +from ..filters import Filter, NULL_FILTER -def copy_node_isolated(node : Node) -> Node: +def copy_node_attrs(node : Node) -> Node: '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' node_attrs = { attr_name : attr for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children } + # assert(node_copy.children == tuple()) return Node(**node_attrs) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter[Node]]=None) -> Node: +def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - if stop is None: - stop = NULL_FILTER - - node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes - assert(node_copy.children == tuple()) + node_copy = copy_node_attrs(node) # make a read-only copy of JUST the current node's attributes if node.children is not None: for child in node.children: if stop(child): diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py index 930a853..24966d0 100644 --- a/polymerist/genutils/treetools/treeinter.py +++ b/polymerist/genutils/treetools/treeinter.py @@ -9,7 +9,7 @@ from anytree.exporter import DictExporter from ..decorators.classmod import register_abstract_class_attrs -from . import Filter, NULL_FILTER +from ..filters import Filter, NULL_FILTER @register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE @@ -43,7 +43,7 @@ def compile_tree_factory( if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - def compile_tree(obj : T, max_depth : Optional[int]=None, exclude : Optional[Filter[T]]=NULL_FILTER, _curr_depth : int=0) -> Node: + def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Optional[Filter[node_corresp.FROMTYPE]]=NULL_FILTER, _curr_depth : int=0) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference From aa30f4ee793cecd3ccf8bdd92bfbeddbcd0a1622 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 15:19:30 -0600 Subject: [PATCH 010/191] Added option to filter down node attributes when copying nodes/trees --- polymerist/genutils/treetools/treecopy.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 747790e..132ee2a 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -8,24 +8,24 @@ from ..filters import Filter, NULL_FILTER -def copy_node_attrs(node : Node) -> Node: +def copy_node_attrs(node : Node, attr_filter : Filter[str]=NULL_FILTER) -> Node: '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' node_attrs = { attr_name : attr for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children + if attr_filter(attr_name) } # assert(node_copy.children == tuple()) return Node(**node_attrs) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER) -> Node: +def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER, attr_filter : Filter[str]=NULL_FILTER) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - node_copy = copy_node_attrs(node) # make a read-only copy of JUST the current node's attributes - if node.children is not None: - for child in node.children: - if stop(child): - continue - child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion - child_copy.parent = node_copy - + node_copy = copy_node_attrs(node, attr_filter=attr_filter) # make a read-only copy of JUST the current node's attributes + for child in node.children: # NOTE: this also works for leaf nodes, as their "children" attrs is just an empty tuple + if stop(child): + continue + child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy.parent = node_copy + return node_copy \ No newline at end of file From c46bf5cc5f53fcaa58ef9a44542f458bf9f330e1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 15:20:51 -0600 Subject: [PATCH 011/191] Renamed compiled_simple_getable_attrs() to compile_argfree_getable_attrs() to make intent clearer --- polymerist/genutils/attrs.py | 2 +- polymerist/mdtools/openmmtools/description.py | 4 ++-- polymerist/rdutils/rdprops.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py index 7603c60..febcf08 100644 --- a/polymerist/genutils/attrs.py +++ b/polymerist/genutils/attrs.py @@ -4,7 +4,7 @@ import re -def compile_simple_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: +def compile_argfree_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: '''Takes an object and returns a dict of the return values of all argument-free methods of the objects Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' getable_dict = {} diff --git a/polymerist/mdtools/openmmtools/description.py b/polymerist/mdtools/openmmtools/description.py index 1f1cd94..b488fe9 100644 --- a/polymerist/mdtools/openmmtools/description.py +++ b/polymerist/mdtools/openmmtools/description.py @@ -3,7 +3,7 @@ from typing import Any, Union from openmm import NonbondedForce, System -from ...genutils.attrs import compile_simple_getable_attrs +from ...genutils.attrs import compile_argfree_getable_attrs from ...genutils.textual.prettyprint import dict_to_indented_str @@ -30,7 +30,7 @@ def describe_forces(ommsys : System, as_str : bool=False) -> Union[str, dict[str '''Provides a dictionary (keyed by force names) which summarizes the parameters of each Force in an OpenMM system''' force_desc_dict = {} for force in ommsys.getForces(): - force_attrs = compile_simple_getable_attrs(force, getter_str='\Aget', repl_str='') # getter string here asserts that "get" is at the start of the attribute name + force_attrs = compile_argfree_getable_attrs(force, getter_str='\Aget', repl_str='') # getter string here asserts that "get" is at the start of the attribute name force_attrs['Type'] = type(force).__name__ if (nonbond_id := force_attrs.get(NONBOND_METHOD_KEY)) is not None: diff --git a/polymerist/rdutils/rdprops.py b/polymerist/rdutils/rdprops.py index 13d7631..ed209a3 100644 --- a/polymerist/rdutils/rdprops.py +++ b/polymerist/rdutils/rdprops.py @@ -9,7 +9,7 @@ from .labeling.bijection import bijective_atom_id_iter from ..genutils.decorators.functional import optional_in_place from ..genutils.typetools.categorical import _union_member_factory -from ..genutils.attrs import compile_simple_getable_attrs +from ..genutils.attrs import compile_argfree_getable_attrs # RDKit-specific generics and type aliases @@ -65,7 +65,7 @@ # PROPERTY INSPECTION FUNCTIONS def detailed_rdobj_info(rdobj : RDObj) -> dict[str, Any]: '''Extract all get-able info about a particular RDKit atom. Does NOT include any non-default Prop values (e.g. atomMapNumber)''' - return compile_simple_getable_attrs(rdobj, getter_str='Get', repl_str='') + return compile_argfree_getable_attrs(rdobj, getter_str='Get', repl_str='') def atom_ids_with_prop(rdmol : Mol, prop_name : str) -> list[int]: '''Returns list of atom IDs of atom which have a particular property assigned''' From d0af3d3ffe69ec15dc1d0ce6a10a6046861fdde1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 16:06:11 -0600 Subject: [PATCH 012/191] Added option to filter attributes when copying a Node or tree --- polymerist/genutils/treetools/treecopy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 132ee2a..eb4ed9f 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -5,10 +5,10 @@ from anytree.node import Node from anytree.exporter import DictExporter -from ..filters import Filter, NULL_FILTER +from ..filters import Filter, ALWAYS_TRUE_FILTER, ALWAYS_FALSE_FILTER -def copy_node_attrs(node : Node, attr_filter : Filter[str]=NULL_FILTER) -> Node: +def copy_node_attrs(node : Node, attr_filter : Filter[str]=ALWAYS_TRUE_FILTER) -> Node: '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' node_attrs = { attr_name : attr @@ -19,13 +19,13 @@ def copy_node_attrs(node : Node, attr_filter : Filter[str]=NULL_FILTER) -> Node: return Node(**node_attrs) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER, attr_filter : Filter[str]=NULL_FILTER) -> Node: +def copy_tree(node : Node, stop : Filter[Node]=ALWAYS_FALSE_FILTER, attr_filter : Filter[str]=ALWAYS_TRUE_FILTER) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' node_copy = copy_node_attrs(node, attr_filter=attr_filter) # make a read-only copy of JUST the current node's attributes for child in node.children: # NOTE: this also works for leaf nodes, as their "children" attrs is just an empty tuple if stop(child): continue - child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy = copy_tree(child, stop=stop, attr_filter=attr_filter) # recursively copy children until stop criterion child_copy.parent = node_copy return node_copy \ No newline at end of file From dd03eefd45a0219a3a787e5221b32c61009f9d2c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 16:06:43 -0600 Subject: [PATCH 013/191] Added explicit "ALWAYS_TRUE" and "ALWAYS_FALSE" filters to cover alternate use-cases --- polymerist/genutils/filters.py | 8 +++++++- polymerist/genutils/treetools/treeinter.py | 9 ++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/polymerist/genutils/filters.py b/polymerist/genutils/filters.py index bb2eb80..18ceeed 100644 --- a/polymerist/genutils/filters.py +++ b/polymerist/genutils/filters.py @@ -5,4 +5,10 @@ T = TypeVar('T') Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools -NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) \ No newline at end of file +# "TRIVIAL" filters to use as defaults when the Filter call signature is required +ALWAYS_TRUE_FILTER : Filter[T] = lambda inp : True +ALWAYS_FALSE_FILTER : Filter[T] = lambda inp : False + +# aliases for trivial filters for many use-cases +NEVER_FALSE_FILTER = MARK_ALL_FILTER = FLAG_ALL_FILTER = ALWAYS_TRUE_FILTER +NEVER_TRUE_FILTER = MARK_NONE_FILTER = FLAG_NONE_FILTER = ALWAYS_FALSE_FILTER \ No newline at end of file diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py index 24966d0..9db3b89 100644 --- a/polymerist/genutils/treetools/treeinter.py +++ b/polymerist/genutils/treetools/treeinter.py @@ -3,13 +3,12 @@ from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar from abc import ABC, abstractmethod -T = TypeVar('T') - from anytree.node import Node -from anytree.exporter import DictExporter from ..decorators.classmod import register_abstract_class_attrs -from ..filters import Filter, NULL_FILTER +from ..filters import Filter, ALWAYS_FALSE_FILTER + +T = TypeVar('T') @register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE @@ -43,7 +42,7 @@ def compile_tree_factory( if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Optional[Filter[node_corresp.FROMTYPE]]=NULL_FILTER, _curr_depth : int=0) -> Node: + def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Filter[node_corresp.FROMTYPE]=ALWAYS_FALSE_FILTER, _curr_depth : int=0) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference From de46645a5eefa42d89ebd9146dac3c39026aeb6f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 13:24:27 -0600 Subject: [PATCH 014/191] Created new genutils.attrs submodule for inspecting attributes --- polymerist/genutils/attrs.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 polymerist/genutils/attrs.py diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py new file mode 100644 index 0000000..7603c60 --- /dev/null +++ b/polymerist/genutils/attrs.py @@ -0,0 +1,18 @@ +'''For dynamically inspecting and modifying attributes of Python objects''' + +from typing import Any, Optional +import re + + +def compile_simple_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: + '''Takes an object and returns a dict of the return values of all argument-free methods of the objects + Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' + getable_dict = {} + for attr_name in dir(obj): + if re.search(getter_str, attr_name): + try: + attr_key = attr_name if (repl_str is None) else re.sub(getter_str, repl_str, attr_name) + getable_dict[attr_key] = getattr(obj, attr_name)() + except (TypeError, Exception): # TODO : find way to selectively intercept the Boost C++ wrapper ArgumentError + pass + return getable_dict \ No newline at end of file From 9ffbbdd4d2e99688a0d7d9b488df687be13bccbb Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 13:24:59 -0600 Subject: [PATCH 015/191] Moved compiled_simple_gettable_attrs() from genutils.importutils to genutils.attrs; updated dependent imports --- polymerist/genutils/importutils.py | 18 +----------------- polymerist/mdtools/openmmtools/description.py | 2 +- polymerist/rdutils/rdprops.py | 2 +- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/polymerist/genutils/importutils.py b/polymerist/genutils/importutils.py index e6988e1..0ac3642 100644 --- a/polymerist/genutils/importutils.py +++ b/polymerist/genutils/importutils.py @@ -4,9 +4,8 @@ LOGGER = logging.getLogger(__name__) from types import ModuleType -from typing import Any, Generator, Iterable, Optional +from typing import Generator, Iterable, Optional -import re import pkgutil import importlib @@ -14,21 +13,6 @@ from itertools import chain -# ATTRIBUTE GETTING AND SETTING -def compile_simple_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: - '''Takes an object and returns a dict of the return values of all argument-free methods of the objects - Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' - getable_dict = {} - for attr_name in dir(obj): - if re.search(getter_str, attr_name): - try: - attr_key = attr_name if (repl_str is None) else re.sub(getter_str, repl_str, attr_name) - getable_dict[attr_key] = getattr(obj, attr_name)() - except (TypeError, Exception): # TODO : find way to selectively intercept the Boost C++ wrapper ArgumentError - pass - return getable_dict - - # FILETREE CHARACTERS _TREE_WIDTH : int = 4 assert(_TREE_WIDTH > 0) diff --git a/polymerist/mdtools/openmmtools/description.py b/polymerist/mdtools/openmmtools/description.py index 3f3d048..1f1cd94 100644 --- a/polymerist/mdtools/openmmtools/description.py +++ b/polymerist/mdtools/openmmtools/description.py @@ -3,7 +3,7 @@ from typing import Any, Union from openmm import NonbondedForce, System -from ...genutils.importutils import compile_simple_getable_attrs +from ...genutils.attrs import compile_simple_getable_attrs from ...genutils.textual.prettyprint import dict_to_indented_str diff --git a/polymerist/rdutils/rdprops.py b/polymerist/rdutils/rdprops.py index 1d944a4..13d7631 100644 --- a/polymerist/rdutils/rdprops.py +++ b/polymerist/rdutils/rdprops.py @@ -9,7 +9,7 @@ from .labeling.bijection import bijective_atom_id_iter from ..genutils.decorators.functional import optional_in_place from ..genutils.typetools.categorical import _union_member_factory -from ..genutils.importutils import compile_simple_getable_attrs +from ..genutils.attrs import compile_simple_getable_attrs # RDKit-specific generics and type aliases From b72dfbd8f226eedee7e96002b105b72cb150f359 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 18:03:30 -0600 Subject: [PATCH 016/191] Added return type annotations to class decorators --- polymerist/genutils/decorators/classmod.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index f663106..786880a 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -1,10 +1,10 @@ '''Decorators for modifying classes''' -from typing import Callable, Iterable, Optional, TypeVar +from typing import Callable, Iterable, Optional, TypeVar, Union C = TypeVar('C') -def generate_repr(cls : Optional[C]=None, disp_attrs : Optional[Iterable[str]]=None, lookup_attr : Optional[str]=None): +def generate_repr(cls : Optional[C]=None, disp_attrs : Optional[Iterable[str]]=None, lookup_attr : Optional[str]=None) -> Union[C, Callable[[C], C]]: ''' Class decorator for auto-generating __repr__ methods @@ -33,7 +33,7 @@ def _repr_generic(self) -> str: return class_decorator return class_decorator(cls) # return literal class decorator call -def register_subclasses(cls : Optional[C]=None, key_attr : str='__name__', reg_attr : str='subclass_registry') -> Callable[[C], C]: +def register_subclasses(cls : Optional[C]=None, key_attr : str='__name__', reg_attr : str='subclass_registry') -> Union[C, Callable[[C], C]]: ''' Parametric class decorator for automatically generating a registry of subclasses of a target class Binds registry to the "registry" class property in the target class From b3ec0e414f5e005ac70f47868bec82a3348fad54 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 18:55:31 -0600 Subject: [PATCH 017/191] Wrote decorator which dynamically registers abstract class attributes --- polymerist/genutils/decorators/classmod.py | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index 786880a..1ddc020 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -57,3 +57,29 @@ def _registry(cls : C) -> dict[str, C]: if cls is None: # null case (i.e. call without parens), return factory call return class_decorator return class_decorator(cls) # return literal class decorator call + +# NOTE: "klass" is needed to distinguish between the class modified by this decorator and the classmethod arg when calling super() +# "klass" here is the parent, while "cls" is the child +def register_abstract_class_attrs(*attr_names : list[str]) -> Callable[[C], C]: + '''Register a list of string attribute names as abstract class attributes, + which MUST be implemented by child classes of the wrapped class''' + def class_decorator(klass : C) -> C: + '''The actual (argument-free) class decorator''' + def __wrapped_init_subclass__(cls : C, **kwargs) -> None: + '''Wrapper for subclass definition which actually enforces that all named attributes are set''' + for attr_name in attr_names: + passed_attr_value = kwargs.pop(attr_name, NotImplemented) # want this removed from kwargs before passing to super, regardless of whether already set in child + attr_val_on_child = getattr(cls, attr_name, NotImplemented) # check if this has been set in the child in code + + if attr_val_on_child is NotImplemented: # if the value has not been set in code... + if passed_attr_value is not NotImplemented: # ...fall back to value passed into class definition, if it exists... + attr_val_on_child = passed_attr_value + else: # otherwise, fail and raise Exception + raise TypeError(f"Can't instantiate abstract class {cls.__name__} with abstract class property '{attr_name}' undefined") + + super(klass, cls).__init_subclass__(**kwargs) # this should fail if extraneous named args are passed + + klass.__init_subclass__ = classmethod(__wrapped_init_subclass__) + return klass + + return class_decorator # no need for application check here, since the parameterized decorator doesn't take a class to be modified \ No newline at end of file From 00993733edd998c9c97a5b1081a3dd3209d7ec71 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 20:25:55 -0600 Subject: [PATCH 018/191] Fixed bug where class attribute was not actually being set in register_abstract_class_attrs --- polymerist/genutils/decorators/classmod.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index 1ddc020..592f469 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -60,7 +60,7 @@ def _registry(cls : C) -> dict[str, C]: # NOTE: "klass" is needed to distinguish between the class modified by this decorator and the classmethod arg when calling super() # "klass" here is the parent, while "cls" is the child -def register_abstract_class_attrs(*attr_names : list[str]) -> Callable[[C], C]: +def register_abstract_class_attrs(*attr_names : list[str]) -> Callable[[C], C]: # TODO: add mechanism for typehinting '''Register a list of string attribute names as abstract class attributes, which MUST be implemented by child classes of the wrapped class''' def class_decorator(klass : C) -> C: @@ -73,7 +73,7 @@ def __wrapped_init_subclass__(cls : C, **kwargs) -> None: if attr_val_on_child is NotImplemented: # if the value has not been set in code... if passed_attr_value is not NotImplemented: # ...fall back to value passed into class definition, if it exists... - attr_val_on_child = passed_attr_value + setattr(cls, attr_name, passed_attr_value) else: # otherwise, fail and raise Exception raise TypeError(f"Can't instantiate abstract class {cls.__name__} with abstract class property '{attr_name}' undefined") From 05fd39130a4040ae7f506c065937f8943aea2c5d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 20:26:14 -0600 Subject: [PATCH 019/191] Created new genutils submodule for generic tree-related functionality --- polymerist/genutils/treetools.py | 108 +++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 polymerist/genutils/treetools.py diff --git a/polymerist/genutils/treetools.py b/polymerist/genutils/treetools.py new file mode 100644 index 0000000..0b96f95 --- /dev/null +++ b/polymerist/genutils/treetools.py @@ -0,0 +1,108 @@ +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' + +from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar +from abc import ABC, abstractmethod + +Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools +T = TypeVar('T') + +from anytree.node import Node +from anytree.exporter import DictExporter + +from .decorators.classmod import register_abstract_class_attrs + + +def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually + '''Produce a simplified tree for performing tests''' + root = Node('f') + b = Node('b', foo='bb', parent=root) + a = Node('a', foo='aa', parent=b) + d = Node('d', foo='dd', parent=b) + c = Node('c', foo='cc', parent=d) + e = Node('e', foo='ee', parent=d) + g = Node('g', foo='gg', parent=root) + i = Node('i', foo='ii', parent=g) + h = Node('h', foo='hh', parent=i) + + return root + + +# TREE COPYING +def copy_node_isolated(node : Node) -> Node: + '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' + node_attrs = { + attr_name : attr + for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children + } + return Node(**node_attrs) + +# NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree +def copy_tree(node : Node, stop : Optional[Filter]=None) -> Node: + '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' + if stop is None: + stop = lambda node : False + + node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes + assert(node_copy.children == tuple()) + if node.children is not None: + for child in node.children: + if stop(child): + continue + child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy.parent = node_copy + + return node_copy + + +# INTERFACES FOR BUILDING TREES FROM OTHER CLASSES +@register_abstract_class_attrs('FROMTYPE') +class AbstractNodeCorrespondence(ABC, Generic[T]): # in concrete implementations, the type of NODETYPE should match T + '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' + @abstractmethod + def name(self, obj : T) -> str: + '''Define how to obtain a string name''' + pass + + @abstractmethod + def has_children(self, obj : T) -> bool: + '''Define how to check if an object can produce children in the first place before attempting to do so''' + pass + + @abstractmethod + def children(self, obj : T) -> Optional[Iterable[T]]: + '''Define how to obtain node children from an instance + Should return NoneType if the instance is "leaf-like"''' + pass + +def compile_tree_factory(node_corresp : AbstractNodeCorrespondence[T], class_alias : Optional[str]=None, obj_attr_name : Optional[str]=None) -> Callable[[T, Optional[int]], Node]: + '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist + if class_alias is None: # an alternative name to use when describing the tree creation for this class + class_alias = node_corresp.FROMTYPE.__name__ + + if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type + obj_attr_name = class_alias + + def compile_tree(obj : T, max_depth : Optional[int]=None, _curr_depth : int=0) -> Node: + # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + node = Node(name=node_corresp.name(obj)) + setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference + + if node_corresp.has_children(obj) and ( # recursively add subnodes IFF + (max_depth is None) # 1) no depth limit is set, or + or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet + ): + for child_obj in node_corresp.children(obj): + sub_node = compile_tree(child_obj, max_depth=max_depth, _curr_depth=_curr_depth+1) + sub_node.parent = node + + return node + + # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) + compile_tree.__doc__ = f''' + Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object + + Any sub-{class_alias} encountered will be expanded into its own tree, + up to the specified maximum depth, or until exhaustion if max_depth=None + ''' + + return compile_tree \ No newline at end of file From bde603c0ee0dfed4bd57c45355d4322a5bf7a1b5 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 15 Aug 2024 21:01:31 -0600 Subject: [PATCH 020/191] Added option to filter (via "stop" Callable) in compile_tree() --- polymerist/genutils/treetools.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/treetools.py b/polymerist/genutils/treetools.py index 0b96f95..6fe4efa 100644 --- a/polymerist/genutils/treetools.py +++ b/polymerist/genutils/treetools.py @@ -4,6 +4,8 @@ from abc import ABC, abstractmethod Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools +NULL_FILTER : Filter = lambda inp : False # a filter which doesn't do anything (but has the right call signature) + T = TypeVar('T') from anytree.node import Node @@ -40,7 +42,7 @@ def copy_node_isolated(node : Node) -> Node: def copy_tree(node : Node, stop : Optional[Filter]=None) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' if stop is None: - stop = lambda node : False + stop = NULL_FILTER node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes assert(node_copy.children == tuple()) @@ -74,7 +76,12 @@ def children(self, obj : T) -> Optional[Iterable[T]]: Should return NoneType if the instance is "leaf-like"''' pass -def compile_tree_factory(node_corresp : AbstractNodeCorrespondence[T], class_alias : Optional[str]=None, obj_attr_name : Optional[str]=None) -> Callable[[T, Optional[int]], Node]: +def compile_tree_factory( + node_corresp : AbstractNodeCorrespondence[T], + class_alias : Optional[str]=None, + obj_attr_name : Optional[str]=None, + + ) -> Callable[[T, Optional[int]], Node]: '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist if class_alias is None: # an alternative name to use when describing the tree creation for this class class_alias = node_corresp.FROMTYPE.__name__ @@ -82,8 +89,12 @@ def compile_tree_factory(node_corresp : AbstractNodeCorrespondence[T], class_ali if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - def compile_tree(obj : T, max_depth : Optional[int]=None, _curr_depth : int=0) -> Node: + + def compile_tree(obj : T, max_depth : Optional[int]=None, stop : Optional[Filter]=None, _curr_depth : int=0) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + if stop is None: # to blacklist certain branches from being formed + stop = NULL_FILTER + node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference @@ -92,8 +103,9 @@ def compile_tree(obj : T, max_depth : Optional[int]=None, _curr_depth : int=0) or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet ): for child_obj in node_corresp.children(obj): - sub_node = compile_tree(child_obj, max_depth=max_depth, _curr_depth=_curr_depth+1) - sub_node.parent = node + if not stop(child_obj): + sub_node = compile_tree(child_obj, max_depth=max_depth, stop=stop, _curr_depth=_curr_depth+1) + sub_node.parent = node return node From 4311da421c4033ab460cc77d10b4bb8bc9ecd1b1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 13:22:20 -0600 Subject: [PATCH 021/191] Broke treetools into component submodules --- polymerist/genutils/treetools.py | 120 -------------------- polymerist/genutils/treetools/__init__.py | 7 ++ polymerist/genutils/treetools/test_trees.py | 17 +++ polymerist/genutils/treetools/treecopy.py | 34 ++++++ polymerist/genutils/treetools/treeinter.py | 70 ++++++++++++ 5 files changed, 128 insertions(+), 120 deletions(-) delete mode 100644 polymerist/genutils/treetools.py create mode 100644 polymerist/genutils/treetools/__init__.py create mode 100644 polymerist/genutils/treetools/test_trees.py create mode 100644 polymerist/genutils/treetools/treecopy.py create mode 100644 polymerist/genutils/treetools/treeinter.py diff --git a/polymerist/genutils/treetools.py b/polymerist/genutils/treetools.py deleted file mode 100644 index 6fe4efa..0000000 --- a/polymerist/genutils/treetools.py +++ /dev/null @@ -1,120 +0,0 @@ -'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' - -from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar -from abc import ABC, abstractmethod - -Filter : TypeAlias = Callable[[Any], bool] # TODO: move this to somewhere in typetools -NULL_FILTER : Filter = lambda inp : False # a filter which doesn't do anything (but has the right call signature) - -T = TypeVar('T') - -from anytree.node import Node -from anytree.exporter import DictExporter - -from .decorators.classmod import register_abstract_class_attrs - - -def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually - '''Produce a simplified tree for performing tests''' - root = Node('f') - b = Node('b', foo='bb', parent=root) - a = Node('a', foo='aa', parent=b) - d = Node('d', foo='dd', parent=b) - c = Node('c', foo='cc', parent=d) - e = Node('e', foo='ee', parent=d) - g = Node('g', foo='gg', parent=root) - i = Node('i', foo='ii', parent=g) - h = Node('h', foo='hh', parent=i) - - return root - - -# TREE COPYING -def copy_node_isolated(node : Node) -> Node: - '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' - node_attrs = { - attr_name : attr - for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children - } - return Node(**node_attrs) - -# NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter]=None) -> Node: - '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - if stop is None: - stop = NULL_FILTER - - node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes - assert(node_copy.children == tuple()) - if node.children is not None: - for child in node.children: - if stop(child): - continue - child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion - child_copy.parent = node_copy - - return node_copy - - -# INTERFACES FOR BUILDING TREES FROM OTHER CLASSES -@register_abstract_class_attrs('FROMTYPE') -class AbstractNodeCorrespondence(ABC, Generic[T]): # in concrete implementations, the type of NODETYPE should match T - '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' - @abstractmethod - def name(self, obj : T) -> str: - '''Define how to obtain a string name''' - pass - - @abstractmethod - def has_children(self, obj : T) -> bool: - '''Define how to check if an object can produce children in the first place before attempting to do so''' - pass - - @abstractmethod - def children(self, obj : T) -> Optional[Iterable[T]]: - '''Define how to obtain node children from an instance - Should return NoneType if the instance is "leaf-like"''' - pass - -def compile_tree_factory( - node_corresp : AbstractNodeCorrespondence[T], - class_alias : Optional[str]=None, - obj_attr_name : Optional[str]=None, - - ) -> Callable[[T, Optional[int]], Node]: - '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist - if class_alias is None: # an alternative name to use when describing the tree creation for this class - class_alias = node_corresp.FROMTYPE.__name__ - - if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type - obj_attr_name = class_alias - - - def compile_tree(obj : T, max_depth : Optional[int]=None, stop : Optional[Filter]=None, _curr_depth : int=0) -> Node: - # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function - if stop is None: # to blacklist certain branches from being formed - stop = NULL_FILTER - - node = Node(name=node_corresp.name(obj)) - setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference - - if node_corresp.has_children(obj) and ( # recursively add subnodes IFF - (max_depth is None) # 1) no depth limit is set, or - or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet - ): - for child_obj in node_corresp.children(obj): - if not stop(child_obj): - sub_node = compile_tree(child_obj, max_depth=max_depth, stop=stop, _curr_depth=_curr_depth+1) - sub_node.parent = node - - return node - - # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) - compile_tree.__doc__ = f''' - Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object - - Any sub-{class_alias} encountered will be expanded into its own tree, - up to the specified maximum depth, or until exhaustion if max_depth=None - ''' - - return compile_tree \ No newline at end of file diff --git a/polymerist/genutils/treetools/__init__.py b/polymerist/genutils/treetools/__init__.py new file mode 100644 index 0000000..de6724f --- /dev/null +++ b/polymerist/genutils/treetools/__init__.py @@ -0,0 +1,7 @@ +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' + +from typing import Any, Callable, TypeVar + +T = TypeVar('T') +Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools +NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) diff --git a/polymerist/genutils/treetools/test_trees.py b/polymerist/genutils/treetools/test_trees.py new file mode 100644 index 0000000..1eccda2 --- /dev/null +++ b/polymerist/genutils/treetools/test_trees.py @@ -0,0 +1,17 @@ +'''Unit tests for trees''' + +from anytree.node import Node + +def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually + '''Produce a simplified tree for performing tests''' + root = Node('f') + b = Node('b', foo='bb', parent=root) + a = Node('a', foo='aa', parent=b) + d = Node('d', foo='dd', parent=b) + c = Node('c', foo='cc', parent=d) + e = Node('e', foo='ee', parent=d) + g = Node('g', foo='gg', parent=root) + i = Node('i', foo='ii', parent=g) + h = Node('h', foo='hh', parent=i) + + return root \ No newline at end of file diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py new file mode 100644 index 0000000..2d96717 --- /dev/null +++ b/polymerist/genutils/treetools/treecopy.py @@ -0,0 +1,34 @@ +'''Tools for copying parts and wholes of trees, at various levels of resolution''' + +from typing import Optional + +from anytree.node import Node +from anytree.exporter import DictExporter + +from . import Filter, NULL_FILTER + + +def copy_node_isolated(node : Node) -> Node: + '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' + node_attrs = { + attr_name : attr + for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children + } + return Node(**node_attrs) + +# NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree +def copy_tree(node : Node, stop : Optional[Filter[Node]]=None) -> Node: + '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' + if stop is None: + stop = NULL_FILTER + + node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes + assert(node_copy.children == tuple()) + if node.children is not None: + for child in node.children: + if stop(child): + continue + child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy.parent = node_copy + + return node_copy \ No newline at end of file diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py new file mode 100644 index 0000000..930a853 --- /dev/null +++ b/polymerist/genutils/treetools/treeinter.py @@ -0,0 +1,70 @@ +'''Tools for interfacing and representing arbitrary external classes with tree-like data structures''' + +from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar +from abc import ABC, abstractmethod + +T = TypeVar('T') + +from anytree.node import Node +from anytree.exporter import DictExporter + +from ..decorators.classmod import register_abstract_class_attrs +from . import Filter, NULL_FILTER + + +@register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE +class AbstractNodeCorrespondence(ABC, Generic[T]): + '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' + @abstractmethod + def name(self, obj : T) -> str: + '''Define how to obtain a string name''' + pass + + @abstractmethod + def has_children(self, obj : T) -> bool: + '''Define how to check if an object can produce children in the first place before attempting to do so''' + pass + + @abstractmethod + def children(self, obj : T) -> Optional[Iterable[T]]: + '''Define how to obtain node children from an instance + Should return NoneType if the instance is "leaf-like"''' + pass + +def compile_tree_factory( + node_corresp : AbstractNodeCorrespondence[T], + class_alias : Optional[str]=None, + obj_attr_name : Optional[str]=None, + ) -> Callable[[T, Optional[int]], Node]: + '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist + if class_alias is None: # an alternative name to use when describing the tree creation for this class + class_alias = node_corresp.FROMTYPE.__name__ + + if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type + obj_attr_name = class_alias + + def compile_tree(obj : T, max_depth : Optional[int]=None, exclude : Optional[Filter[T]]=NULL_FILTER, _curr_depth : int=0) -> Node: + # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + node = Node(name=node_corresp.name(obj)) + setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference + + if node_corresp.has_children(obj) and ( # recursively add subnodes IFF + (max_depth is None) # 1) no depth limit is set, or + or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet + ): + for child_obj in node_corresp.children(obj): + if not exclude(child_obj): + sub_node = compile_tree(child_obj, max_depth=max_depth, exclude=exclude, _curr_depth=_curr_depth+1) + sub_node.parent = node + + return node + + # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) + compile_tree.__doc__ = f''' + Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object + + Any sub-{class_alias} encountered will be expanded into its own tree, + up to the specified maximum depth, or until exhaustion if max_depth=None + ''' + + return compile_tree \ No newline at end of file From c2205edefbaa56dcf82f51ce2fcd6723cdd14b04 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 14:41:52 -0600 Subject: [PATCH 022/191] Moved filter typehints to their own dedicated module --- polymerist/genutils/filters.py | 8 ++++++++ polymerist/genutils/treetools/__init__.py | 8 +------- polymerist/genutils/treetools/treecopy.py | 13 +++++-------- polymerist/genutils/treetools/treeinter.py | 4 ++-- 4 files changed, 16 insertions(+), 17 deletions(-) create mode 100644 polymerist/genutils/filters.py diff --git a/polymerist/genutils/filters.py b/polymerist/genutils/filters.py new file mode 100644 index 0000000..bb2eb80 --- /dev/null +++ b/polymerist/genutils/filters.py @@ -0,0 +1,8 @@ +'''Typehinting and generic implementations of filter (indicator) functions''' + +from typing import Callable, TypeVar + +T = TypeVar('T') +Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools + +NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) \ No newline at end of file diff --git a/polymerist/genutils/treetools/__init__.py b/polymerist/genutils/treetools/__init__.py index de6724f..b555128 100644 --- a/polymerist/genutils/treetools/__init__.py +++ b/polymerist/genutils/treetools/__init__.py @@ -1,7 +1 @@ -'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' - -from typing import Any, Callable, TypeVar - -T = TypeVar('T') -Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools -NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' \ No newline at end of file diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 2d96717..747790e 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -5,25 +5,22 @@ from anytree.node import Node from anytree.exporter import DictExporter -from . import Filter, NULL_FILTER +from ..filters import Filter, NULL_FILTER -def copy_node_isolated(node : Node) -> Node: +def copy_node_attrs(node : Node) -> Node: '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' node_attrs = { attr_name : attr for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children } + # assert(node_copy.children == tuple()) return Node(**node_attrs) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter[Node]]=None) -> Node: +def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - if stop is None: - stop = NULL_FILTER - - node_copy = copy_node_isolated(node) # make a read-only copy of JUST the current node's attributes - assert(node_copy.children == tuple()) + node_copy = copy_node_attrs(node) # make a read-only copy of JUST the current node's attributes if node.children is not None: for child in node.children: if stop(child): diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py index 930a853..24966d0 100644 --- a/polymerist/genutils/treetools/treeinter.py +++ b/polymerist/genutils/treetools/treeinter.py @@ -9,7 +9,7 @@ from anytree.exporter import DictExporter from ..decorators.classmod import register_abstract_class_attrs -from . import Filter, NULL_FILTER +from ..filters import Filter, NULL_FILTER @register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE @@ -43,7 +43,7 @@ def compile_tree_factory( if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - def compile_tree(obj : T, max_depth : Optional[int]=None, exclude : Optional[Filter[T]]=NULL_FILTER, _curr_depth : int=0) -> Node: + def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Optional[Filter[node_corresp.FROMTYPE]]=NULL_FILTER, _curr_depth : int=0) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference From 0c32bcacefcb8ed28d7f736a12f63359b88abcf4 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 15:19:30 -0600 Subject: [PATCH 023/191] Added option to filter down node attributes when copying nodes/trees --- polymerist/genutils/treetools/treecopy.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 747790e..132ee2a 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -8,24 +8,24 @@ from ..filters import Filter, NULL_FILTER -def copy_node_attrs(node : Node) -> Node: +def copy_node_attrs(node : Node, attr_filter : Filter[str]=NULL_FILTER) -> Node: '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' node_attrs = { attr_name : attr for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children + if attr_filter(attr_name) } # assert(node_copy.children == tuple()) return Node(**node_attrs) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER) -> Node: +def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER, attr_filter : Filter[str]=NULL_FILTER) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - node_copy = copy_node_attrs(node) # make a read-only copy of JUST the current node's attributes - if node.children is not None: - for child in node.children: - if stop(child): - continue - child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion - child_copy.parent = node_copy - + node_copy = copy_node_attrs(node, attr_filter=attr_filter) # make a read-only copy of JUST the current node's attributes + for child in node.children: # NOTE: this also works for leaf nodes, as their "children" attrs is just an empty tuple + if stop(child): + continue + child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy.parent = node_copy + return node_copy \ No newline at end of file From 976346a77955c3220cef540abe0644bc0cbba904 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 15:20:51 -0600 Subject: [PATCH 024/191] Renamed compiled_simple_getable_attrs() to compile_argfree_getable_attrs() to make intent clearer --- polymerist/genutils/attrs.py | 2 +- polymerist/mdtools/openmmtools/description.py | 4 ++-- polymerist/rdutils/rdprops.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py index 7603c60..febcf08 100644 --- a/polymerist/genutils/attrs.py +++ b/polymerist/genutils/attrs.py @@ -4,7 +4,7 @@ import re -def compile_simple_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: +def compile_argfree_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: '''Takes an object and returns a dict of the return values of all argument-free methods of the objects Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' getable_dict = {} diff --git a/polymerist/mdtools/openmmtools/description.py b/polymerist/mdtools/openmmtools/description.py index 1f1cd94..b488fe9 100644 --- a/polymerist/mdtools/openmmtools/description.py +++ b/polymerist/mdtools/openmmtools/description.py @@ -3,7 +3,7 @@ from typing import Any, Union from openmm import NonbondedForce, System -from ...genutils.attrs import compile_simple_getable_attrs +from ...genutils.attrs import compile_argfree_getable_attrs from ...genutils.textual.prettyprint import dict_to_indented_str @@ -30,7 +30,7 @@ def describe_forces(ommsys : System, as_str : bool=False) -> Union[str, dict[str '''Provides a dictionary (keyed by force names) which summarizes the parameters of each Force in an OpenMM system''' force_desc_dict = {} for force in ommsys.getForces(): - force_attrs = compile_simple_getable_attrs(force, getter_str='\Aget', repl_str='') # getter string here asserts that "get" is at the start of the attribute name + force_attrs = compile_argfree_getable_attrs(force, getter_str='\Aget', repl_str='') # getter string here asserts that "get" is at the start of the attribute name force_attrs['Type'] = type(force).__name__ if (nonbond_id := force_attrs.get(NONBOND_METHOD_KEY)) is not None: diff --git a/polymerist/rdutils/rdprops.py b/polymerist/rdutils/rdprops.py index 13d7631..ed209a3 100644 --- a/polymerist/rdutils/rdprops.py +++ b/polymerist/rdutils/rdprops.py @@ -9,7 +9,7 @@ from .labeling.bijection import bijective_atom_id_iter from ..genutils.decorators.functional import optional_in_place from ..genutils.typetools.categorical import _union_member_factory -from ..genutils.attrs import compile_simple_getable_attrs +from ..genutils.attrs import compile_argfree_getable_attrs # RDKit-specific generics and type aliases @@ -65,7 +65,7 @@ # PROPERTY INSPECTION FUNCTIONS def detailed_rdobj_info(rdobj : RDObj) -> dict[str, Any]: '''Extract all get-able info about a particular RDKit atom. Does NOT include any non-default Prop values (e.g. atomMapNumber)''' - return compile_simple_getable_attrs(rdobj, getter_str='Get', repl_str='') + return compile_argfree_getable_attrs(rdobj, getter_str='Get', repl_str='') def atom_ids_with_prop(rdmol : Mol, prop_name : str) -> list[int]: '''Returns list of atom IDs of atom which have a particular property assigned''' From 1049b0a1c8c745459d2c3d3e9b9d25e05f9c3967 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 16:06:11 -0600 Subject: [PATCH 025/191] Added option to filter attributes when copying a Node or tree --- polymerist/genutils/treetools/treecopy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 132ee2a..eb4ed9f 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -5,10 +5,10 @@ from anytree.node import Node from anytree.exporter import DictExporter -from ..filters import Filter, NULL_FILTER +from ..filters import Filter, ALWAYS_TRUE_FILTER, ALWAYS_FALSE_FILTER -def copy_node_attrs(node : Node, attr_filter : Filter[str]=NULL_FILTER) -> Node: +def copy_node_attrs(node : Node, attr_filter : Filter[str]=ALWAYS_TRUE_FILTER) -> Node: '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' node_attrs = { attr_name : attr @@ -19,13 +19,13 @@ def copy_node_attrs(node : Node, attr_filter : Filter[str]=NULL_FILTER) -> Node: return Node(**node_attrs) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Optional[Filter[Node]]=NULL_FILTER, attr_filter : Filter[str]=NULL_FILTER) -> Node: +def copy_tree(node : Node, stop : Filter[Node]=ALWAYS_FALSE_FILTER, attr_filter : Filter[str]=ALWAYS_TRUE_FILTER) -> Node: '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' node_copy = copy_node_attrs(node, attr_filter=attr_filter) # make a read-only copy of JUST the current node's attributes for child in node.children: # NOTE: this also works for leaf nodes, as their "children" attrs is just an empty tuple if stop(child): continue - child_copy = copy_tree(child, stop=stop) # recursively copy children until stop criterion + child_copy = copy_tree(child, stop=stop, attr_filter=attr_filter) # recursively copy children until stop criterion child_copy.parent = node_copy return node_copy \ No newline at end of file From 687b50d0d89a5655ec48dee67c46ecb51c622631 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 16 Aug 2024 16:06:43 -0600 Subject: [PATCH 026/191] Added explicit "ALWAYS_TRUE" and "ALWAYS_FALSE" filters to cover alternate use-cases --- polymerist/genutils/filters.py | 8 +++++++- polymerist/genutils/treetools/treeinter.py | 9 ++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/polymerist/genutils/filters.py b/polymerist/genutils/filters.py index bb2eb80..18ceeed 100644 --- a/polymerist/genutils/filters.py +++ b/polymerist/genutils/filters.py @@ -5,4 +5,10 @@ T = TypeVar('T') Filter = Callable[[T], bool] # TODO: move this to somewhere in typetools -NULL_FILTER : Filter[T] = lambda inp : False # a filter which doesn't do anything (but has the right call signature) \ No newline at end of file +# "TRIVIAL" filters to use as defaults when the Filter call signature is required +ALWAYS_TRUE_FILTER : Filter[T] = lambda inp : True +ALWAYS_FALSE_FILTER : Filter[T] = lambda inp : False + +# aliases for trivial filters for many use-cases +NEVER_FALSE_FILTER = MARK_ALL_FILTER = FLAG_ALL_FILTER = ALWAYS_TRUE_FILTER +NEVER_TRUE_FILTER = MARK_NONE_FILTER = FLAG_NONE_FILTER = ALWAYS_FALSE_FILTER \ No newline at end of file diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py index 24966d0..9db3b89 100644 --- a/polymerist/genutils/treetools/treeinter.py +++ b/polymerist/genutils/treetools/treeinter.py @@ -3,13 +3,12 @@ from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar from abc import ABC, abstractmethod -T = TypeVar('T') - from anytree.node import Node -from anytree.exporter import DictExporter from ..decorators.classmod import register_abstract_class_attrs -from ..filters import Filter, NULL_FILTER +from ..filters import Filter, ALWAYS_FALSE_FILTER + +T = TypeVar('T') @register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE @@ -43,7 +42,7 @@ def compile_tree_factory( if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Optional[Filter[node_corresp.FROMTYPE]]=NULL_FILTER, _curr_depth : int=0) -> Node: + def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Filter[node_corresp.FROMTYPE]=ALWAYS_FALSE_FILTER, _curr_depth : int=0) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference From ca051cfa7433da3db9fb994fba005f119bce322b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 12:43:22 -0700 Subject: [PATCH 027/191] Removed "Abstract" prefix to make class name "NodeCorrespondence" to avoid confusion --- polymerist/genutils/treetools/treeinter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treeinter.py index 9db3b89..5ab949e 100644 --- a/polymerist/genutils/treetools/treeinter.py +++ b/polymerist/genutils/treetools/treeinter.py @@ -12,7 +12,7 @@ @register_abstract_class_attrs('FROMTYPE') # TODO: figure out way to parameterize Generic T here with the type passed as FROMTYPE -class AbstractNodeCorrespondence(ABC, Generic[T]): +class NodeCorrespondence(ABC, Generic[T]): '''Abstract base for implementing how to build an anytree Node tree for an arbitrary class''' @abstractmethod def name(self, obj : T) -> str: @@ -31,7 +31,7 @@ def children(self, obj : T) -> Optional[Iterable[T]]: pass def compile_tree_factory( - node_corresp : AbstractNodeCorrespondence[T], + node_corresp : NodeCorrespondence[T], class_alias : Optional[str]=None, obj_attr_name : Optional[str]=None, ) -> Callable[[T, Optional[int]], Node]: From 583c5b98644bb06b3b403661c9e85f23a622b20f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 12:43:42 -0700 Subject: [PATCH 028/191] Remaned "getter_str" to "getter_re" to make intent and usage clearer --- polymerist/genutils/attrs.py | 8 ++++---- polymerist/mdtools/openmmtools/description.py | 2 +- polymerist/rdutils/rdprops.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py index febcf08..ddd40d0 100644 --- a/polymerist/genutils/attrs.py +++ b/polymerist/genutils/attrs.py @@ -4,14 +4,14 @@ import re -def compile_argfree_getable_attrs(obj : Any, getter_str : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: +def compile_argfree_getable_attrs(obj : Any, getter_re : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: '''Takes an object and returns a dict of the return values of all argument-free methods of the objects - Looks for methods of the object whose names contain with "getter_str", and can replace this with the value of "repl_str" in the final dict output if provided''' + Looks for methods of the object whose names contain with "getter_re", and can replace this with the value of "repl_str" in the final dict output if provided''' getable_dict = {} for attr_name in dir(obj): - if re.search(getter_str, attr_name): + if re.search(getter_re, attr_name): try: - attr_key = attr_name if (repl_str is None) else re.sub(getter_str, repl_str, attr_name) + attr_key = attr_name if (repl_str is None) else re.sub(getter_re, repl_str, attr_name) getable_dict[attr_key] = getattr(obj, attr_name)() except (TypeError, Exception): # TODO : find way to selectively intercept the Boost C++ wrapper ArgumentError pass diff --git a/polymerist/mdtools/openmmtools/description.py b/polymerist/mdtools/openmmtools/description.py index b488fe9..f5578b0 100644 --- a/polymerist/mdtools/openmmtools/description.py +++ b/polymerist/mdtools/openmmtools/description.py @@ -30,7 +30,7 @@ def describe_forces(ommsys : System, as_str : bool=False) -> Union[str, dict[str '''Provides a dictionary (keyed by force names) which summarizes the parameters of each Force in an OpenMM system''' force_desc_dict = {} for force in ommsys.getForces(): - force_attrs = compile_argfree_getable_attrs(force, getter_str='\Aget', repl_str='') # getter string here asserts that "get" is at the start of the attribute name + force_attrs = compile_argfree_getable_attrs(force, getter_re='\Aget', repl_str='') # getter string here asserts that "get" is at the start of the attribute name force_attrs['Type'] = type(force).__name__ if (nonbond_id := force_attrs.get(NONBOND_METHOD_KEY)) is not None: diff --git a/polymerist/rdutils/rdprops.py b/polymerist/rdutils/rdprops.py index ed209a3..1d14975 100644 --- a/polymerist/rdutils/rdprops.py +++ b/polymerist/rdutils/rdprops.py @@ -65,7 +65,7 @@ # PROPERTY INSPECTION FUNCTIONS def detailed_rdobj_info(rdobj : RDObj) -> dict[str, Any]: '''Extract all get-able info about a particular RDKit atom. Does NOT include any non-default Prop values (e.g. atomMapNumber)''' - return compile_argfree_getable_attrs(rdobj, getter_str='Get', repl_str='') + return compile_argfree_getable_attrs(rdobj, getter_re='Get', repl_str='') def atom_ids_with_prop(rdmol : Mol, prop_name : str) -> list[int]: '''Returns list of atom IDs of atom which have a particular property assigned''' From 64dd928e4dda8ae38327cc9a83a9630765848ea4 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 13:12:05 -0700 Subject: [PATCH 029/191] Expanded docstring to conform to numpy docstring standard --- polymerist/genutils/attrs.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py index ddd40d0..175fdb0 100644 --- a/polymerist/genutils/attrs.py +++ b/polymerist/genutils/attrs.py @@ -1,12 +1,32 @@ '''For dynamically inspecting and modifying attributes of Python objects''' -from typing import Any, Optional +from typing import Any, Optional, Union import re -def compile_argfree_getable_attrs(obj : Any, getter_re : str='get', repl_str : Optional[str]=None) -> dict[str, Any]: - '''Takes an object and returns a dict of the return values of all argument-free methods of the objects - Looks for methods of the object whose names contain with "getter_re", and can replace this with the value of "repl_str" in the final dict output if provided''' +def compile_argfree_getable_attrs(obj : Any, getter_re : Union[str, re.Pattern]='.*', repl_str : Optional[str]=None) -> dict[str, Any]: + '''Compile the values of all methods of an object which require no arguments other than perhaps the object itself (this includes properties) + Returns a dict whose keys are the names of the methods called and whose values are the return values of those object methods + + Can optionally filter the names of returned method using a regular expression, passed to "getter_re" + Can also optionally replace the chosen regex with an arbitrary string (including the empty string), passed to "repl_str" + + Parameters + ---------- + obj : Any + Any object instance + getter_re : str or re.Pattern (optional), default ".*" + Optional regular expression to use for filtering down returned methods + Only methods whose names match the target regex are returns + repl_str : str (optional) + If provided, will replace the + for example, repl_str="" can be used to delete the regex from returned method names + + Returns + ------- + getable_dict : dict[str, Any] + dict whose keys are the selected method names and whose values are the corresponding method returns + ''' getable_dict = {} for attr_name in dir(obj): if re.search(getter_re, attr_name): From 5258458e9b04312059b187a11a19c7d9c0f486b0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 13:51:44 -0700 Subject: [PATCH 030/191] Wrote unit tests for genutils.attrs --- polymerist/genutils/attrs.py | 2 +- polymerist/tests/genutils/test_attrs.py | 76 +++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 polymerist/tests/genutils/test_attrs.py diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py index 175fdb0..f94a9dd 100644 --- a/polymerist/genutils/attrs.py +++ b/polymerist/genutils/attrs.py @@ -5,7 +5,7 @@ def compile_argfree_getable_attrs(obj : Any, getter_re : Union[str, re.Pattern]='.*', repl_str : Optional[str]=None) -> dict[str, Any]: - '''Compile the values of all methods of an object which require no arguments other than perhaps the object itself (this includes properties) + '''Compile the values of all methods of an object which require no arguments other than perhaps the object itself (this EXCLUDES properties) Returns a dict whose keys are the names of the methods called and whose values are the return values of those object methods Can optionally filter the names of returned method using a regular expression, passed to "getter_re" diff --git a/polymerist/tests/genutils/test_attrs.py b/polymerist/tests/genutils/test_attrs.py new file mode 100644 index 0000000..c594bf0 --- /dev/null +++ b/polymerist/tests/genutils/test_attrs.py @@ -0,0 +1,76 @@ +'''Unit test for attribute inspection''' + +from polymerist.genutils.attrs import compile_argfree_getable_attrs +import pytest + + +class ArgfreeGettableAttrTest(): + '''Dummy class for testing that dynamic attribute inspection works properly''' + FOO = 'bar' + + def __init__(self, answer : int=42, spam : str='eggs') -> None: + self.answer = answer + self.spam = spam + + def get_answer(self) -> int: + '''Prototypical getter which returns an attribute of self and is prefixed by "get"''' + return self.answer + + def stringy_answer(self) -> str: + '''A getter whose name does NOT contain any form of "get", but which nevertheless qualifies as argument-free''' + return str(self.answer) + + def GetSpam(self) -> str: + '''Camel-case getter to check that this is correctly handled by regex''' + return self.spam + + @property + def get_answer_prop(self) -> str: + '''Property getter, to test that inspection excludes this''' + return self.answer + + @classmethod + def get_foo(cls) -> str: + '''Class attr-based getter to test how shared-namespace attrs are handled''' + return cls.FOO + + def _get_spam(self) -> int: + '''Getter which is marked "private" and should not be returned ''' + return self.spam + + def get_echo(self, val : str) -> str: + '''Getter which requires an argument passed and is therefore not argument-free''' + return val + +@pytest.fixture +def testobj() -> ArgfreeGettableAttrTest: + return ArgfreeGettableAttrTest(answer=57, spam='ham') + + +# NOTE: not checking with getter_re='get' as this also returns __getstate__, which is somewhat unexpected to handle + +def test_getable_attrs_regex_any(testobj): + '''Test if getable arg regex match hits anywhere in method name''' + attrs = compile_argfree_getable_attrs(testobj, getter_re='answer') # regex match should also hit in MIDDLE of string + assert attrs == {'get_answer': testobj.answer, 'stringy_answer' : str(testobj.answer)} + +def test_getable_attrs_regex_start(testobj): + '''Test if getable arg regex match hits only at the start of in method name''' + attrs = compile_argfree_getable_attrs(testobj, getter_re='^get') # regex match should also hit in MIDDLE of string + assert attrs == {'get_answer': testobj.answer, 'get_foo' : testobj.FOO} + +def test_getable_attrs_regex_case_insensitive(testobj): + '''Test if getable arg regex match hits find method with regex only at the start of in method name irrespective of case''' + attrs = compile_argfree_getable_attrs(testobj, getter_re='^[gG]et')#, getter_re='get') + assert attrs == {'GetSpam': testobj.spam, 'get_answer': testobj.answer, 'get_foo': testobj.FOO} + +def test_getable_attrs_regex_camel(testobj): + '''Test if getable arg regex match hits camel-cased ''' + attrs = compile_argfree_getable_attrs(testobj, getter_re='^Get[A-Z].*') # regex match should also hit in MIDDLE of string + assert attrs == {'GetSpam' : testobj.spam} + +def test_getable_attrs_regex_repl(testobj): + '''Test if regex replacement on returned method names is performed as expected''' + repl = 'TEST' + attrs = compile_argfree_getable_attrs(testobj, getter_re='^get', repl_str=repl) # regex match should also hit in MIDDLE of string + assert attrs == {f'{repl}_answer': testobj.answer, f'{repl}_foo': testobj.FOO} \ No newline at end of file From 92cc73c781e30e446f68c3d75e436b8532c24ec1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 14:15:53 -0700 Subject: [PATCH 031/191] Expanded path emptiness checks to explicitly handle both files and dirs --- polymerist/genutils/fileutils/pathutils.py | 31 +++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/polymerist/genutils/fileutils/pathutils.py b/polymerist/genutils/fileutils/pathutils.py index 103c044..e77c756 100644 --- a/polymerist/genutils/fileutils/pathutils.py +++ b/polymerist/genutils/fileutils/pathutils.py @@ -13,11 +13,34 @@ def dotless(path : Path) -> str: '''Separate the dot from file path. Returns the original suffix if not dot is present''' return _dotless(path.suffix) -def is_empty(path : Path) -> bool: - '''Check if a directory is empty''' - assert(path.is_dir()) - return list(path.iterdir()) == [] # can't use "len" for generators : TODO : make this more efficient (i.e. iteration-based) for large directories +# EMPTINESS CHECKS +def is_empty_dir(dirpath : Path) -> bool: + '''Check if a directory contains no files''' + if not dirpath.is_dir(): + raise NotADirectoryError(f'dirpath must point to directory, not to file "{dirpath}"') + return list(dirpath.iterdir()) == [] # can't use "len" for generators : TODO : make this more efficient (i.e. iteration-based) for large directories + +def is_empty_file(filepath : Path) -> bool: + '''Check if a file contains no data''' + if filepath.is_dir(): + raise IsADirectoryError(f'filepath must point to file, not to directory "{filepath}"') + # NOTE: not checking file existence here, as calling stat() will already do this check (and raise appropriate error) + + return filepath.stat().st_size == 0 + +def is_empty(path : Path) -> bool: + '''Flexibly check whether a path is "empty" + If path point to a file, returns whether the file contains data + If path points to a directory, returns whether the directory contains any files (empty or otherwise) + ''' + if path.is_dir(): + return is_empty_dir(path) + elif path.is_file(): + return is_empty_file(path) + else: + return FileNotFoundError + # PATH CREATION FUNCTIONS def assemble_path(directory : Path, prefix : str, extension : str, postfix : str='') -> Path: From f093de81119295fe88cb61f22ed068d80c75d752 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 14:37:51 -0700 Subject: [PATCH 032/191] Renamed to "treebase" and updated module docstring --- polymerist/genutils/treetools/{treeinter.py => treebase.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename polymerist/genutils/treetools/{treeinter.py => treebase.py} (96%) diff --git a/polymerist/genutils/treetools/treeinter.py b/polymerist/genutils/treetools/treebase.py similarity index 96% rename from polymerist/genutils/treetools/treeinter.py rename to polymerist/genutils/treetools/treebase.py index 5ab949e..9db976f 100644 --- a/polymerist/genutils/treetools/treeinter.py +++ b/polymerist/genutils/treetools/treebase.py @@ -1,4 +1,4 @@ -'''Tools for interfacing and representing arbitrary external classes with tree-like data structures''' +'''Interfaces for encoding arbitrary classes into tree-like data structures''' from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar from abc import ABC, abstractmethod From cb2681419083c8a944ccc1ec5c8076f137a90574 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 15:14:07 -0700 Subject: [PATCH 033/191] Expanded docstrings, added exclude filter mixin and check for Node attribute overrides --- polymerist/genutils/treetools/treebase.py | 58 ++++++++++++++++++++--- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/polymerist/genutils/treetools/treebase.py b/polymerist/genutils/treetools/treebase.py index 9db976f..af3c40d 100644 --- a/polymerist/genutils/treetools/treebase.py +++ b/polymerist/genutils/treetools/treebase.py @@ -34,16 +34,50 @@ def compile_tree_factory( node_corresp : NodeCorrespondence[T], class_alias : Optional[str]=None, obj_attr_name : Optional[str]=None, - ) -> Callable[[T, Optional[int]], Node]: - '''Factory method for producing a tree-generating function for the given Type''' # TODO: include blacklist + exclude_mixin : Optional[Filter[T]]=None, + ) -> Callable[[T, Optional[int], Optional[Filter[T]]], Node]: + ''' + Factory method for producing a tree-generating function from a NodeCorrespondence + + Parameters + ---------- + node_corresp : NodeCorrespondence[T] + Definition of a correpondence between an arbitrary type and a Tree Node + class_alias : str (optional) + Name of the corresponding class to inject into docstring + If not provided, will default to the __name__ of the class wrapped by node_corresp + obj_attr_name : str (optional) + The name of the Node attribute to which a copy of the class instance should be bound + If not provided, will default to the value of class_alias + exclude_mixin : Filter[T] (optional) + An optional "master" filter to mix into any + + Returns + ------- + compile_tree : Callable[[T, Optional[int], Optional[Filter[T]]], Node] + Factory function which takes an instance of type T, builds a Tree from it, and returns the root Node + ''' + node_type_name = node_corresp.FROMTYPE.__name__ if class_alias is None: # an alternative name to use when describing the tree creation for this class - class_alias = node_corresp.FROMTYPE.__name__ + class_alias = node_type_name if obj_attr_name is None: # the name given to the Node attribute which store an instance of the given arbitrary type obj_attr_name = class_alias - - def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, exclude : Filter[node_corresp.FROMTYPE]=ALWAYS_FALSE_FILTER, _curr_depth : int=0) -> Node: + if hasattr(Node, obj_attr_name): + raise AttributeError(f'Invalid value for obj_attr_name; attribute "{obj_attr_name}" clashes with existing attribute Node.{obj_attr_name}') + + if exclude_mixin is None: + exclude_mixin = ALWAYS_FALSE_FILTER + + def compile_tree( + obj : node_corresp.FROMTYPE, + max_depth : Optional[int]=None, + exclude : Filter[node_corresp.FROMTYPE]=ALWAYS_FALSE_FILTER, + _curr_depth : int=0 + ) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function + exclude = lambda obj : exclude_mixin(obj) | exclude(obj) # incorporate mixin into exclusion criterion # TODO: make this more efficient for double-defaults + node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference @@ -60,10 +94,22 @@ def compile_tree(obj : node_corresp.FROMTYPE, max_depth : Optional[int]=None, e # annoyingly, docstrings must be string literals (this CAN'T be done inside the function definition) compile_tree.__doc__ = f''' - Compile a {class_alias} tree from a(n) {node_corresp.FROMTYPE.__name__} object + Compile a {class_alias} tree from a(n) {node_type_name} object Any sub-{class_alias} encountered will be expanded into its own tree, up to the specified maximum depth, or until exhaustion if max_depth=None + + Parameters + ---------- + obj : {node_type_name} + A(n) instance of a {class_alias} + max_depth : int (optional) + Maximum allowed height of a constructed tree from the root + If None (as default), no limit is set + exclude : Filter[{node_type_name}] (optional) + An optional filter function to reduce the size of the constructed tree + Must accept a(n) {node_type_name} instance as single argument + Should return True when a node is to be excluded, and False otherwise ''' return compile_tree \ No newline at end of file From 7c411be7460f7c07342b3d8422222925d3138c03 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 15:15:52 -0700 Subject: [PATCH 034/191] Fixed exclude_mixin recursion depth bug --- polymerist/genutils/treetools/treebase.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/polymerist/genutils/treetools/treebase.py b/polymerist/genutils/treetools/treebase.py index af3c40d..4530480 100644 --- a/polymerist/genutils/treetools/treebase.py +++ b/polymerist/genutils/treetools/treebase.py @@ -76,8 +76,6 @@ def compile_tree( _curr_depth : int=0 ) -> Node: # NOTE: deliberately omitting docstring here, as it will be built procedurally after defining this function - exclude = lambda obj : exclude_mixin(obj) | exclude(obj) # incorporate mixin into exclusion criterion # TODO: make this more efficient for double-defaults - node = Node(name=node_corresp.name(obj)) setattr(node, obj_attr_name, obj) # keep an instance of the object directly for reference @@ -86,7 +84,7 @@ def compile_tree( or (_curr_depth < max_depth) # 2) a limit IS set, but hasn't been reached yet ): for child_obj in node_corresp.children(obj): - if not exclude(child_obj): + if not (exclude(child_obj) or exclude_mixin(child_obj)): sub_node = compile_tree(child_obj, max_depth=max_depth, exclude=exclude, _curr_depth=_curr_depth+1) sub_node.parent = node From 4b8b73a2eb936e3ee60f2b737543b4b1e7c3193d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 16:05:37 -0700 Subject: [PATCH 035/191] Added treetools submodule for visualizing trees --- polymerist/genutils/treetools/__init__.py | 6 +++++- polymerist/genutils/treetools/treeviz.py | 26 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 polymerist/genutils/treetools/treeviz.py diff --git a/polymerist/genutils/treetools/__init__.py b/polymerist/genutils/treetools/__init__.py index b555128..c93e9b7 100644 --- a/polymerist/genutils/treetools/__init__.py +++ b/polymerist/genutils/treetools/__init__.py @@ -1 +1,5 @@ -'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' \ No newline at end of file +'''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' + +from .treebase import NodeCorrespondence, compile_tree_factory +from .treecopy import copy_tree +from .treeviz import treestr \ No newline at end of file diff --git a/polymerist/genutils/treetools/treeviz.py b/polymerist/genutils/treetools/treeviz.py new file mode 100644 index 0000000..45eb78a --- /dev/null +++ b/polymerist/genutils/treetools/treeviz.py @@ -0,0 +1,26 @@ +'''Wrappers for printing out tree-like data structures''' + +from typing import Callable, Iterable, TypeVar, Union + +from anytree import Node +from anytree.render import RenderTree +from anytree.render import AbstractStyle, AsciiStyle, ContStyle, ContRoundStyle, DoubleStyle + +RENDER_STYLE_MAP = { + StyleType.__name__ : StyleType + for StyleType in AbstractStyle.__subclasses__() +} + + +def treestr(root : Node, style : Union[str, AbstractStyle]=ContStyle(), childiter : Callable[[tuple[Node]], Iterable[Node]]=list, maxlevel : int=None) -> str: + '''Return a printable string representation of a tree from a root Node, reminiscent of GNU tree''' + if isinstance(style, str): + StyleType = RENDER_STYLE_MAP[style] # will raise KeyError if undefined style is provided + style = StyleType() + + return RenderTree( + node=root, + style=style, + childiter=childiter, + maxlevel=maxlevel, + ) \ No newline at end of file From 7d012c23a4fd419513acc9c7b7b91bfedc46a531 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 16:15:16 -0700 Subject: [PATCH 036/191] Added registration of common aliases for tree print styles --- polymerist/genutils/treetools/treeviz.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/treetools/treeviz.py b/polymerist/genutils/treetools/treeviz.py index 45eb78a..f945114 100644 --- a/polymerist/genutils/treetools/treeviz.py +++ b/polymerist/genutils/treetools/treeviz.py @@ -6,13 +6,20 @@ from anytree.render import RenderTree from anytree.render import AbstractStyle, AsciiStyle, ContStyle, ContRoundStyle, DoubleStyle -RENDER_STYLE_MAP = { - StyleType.__name__ : StyleType - for StyleType in AbstractStyle.__subclasses__() +_render_style_aliases : dict[AbstractStyle, list[str]] = { + AsciiStyle : ['ASCII', 'Ascii', 'ascii'], + ContStyle : ['Cont', 'cont'], + ContRoundStyle : ['Contround', 'contround', 'cont_round', 'cround', 'round'], + DoubleStyle : ['Doublestyle', 'Double', 'double', 'dub'], } +RENDER_STYLE_MAP : dict[str, AbstractStyle] = {} +for StyleType in AbstractStyle.__subclasses__(): + RENDER_STYLE_MAP[StyleType.__name__] = StyleType + for alias in _render_style_aliases[StyleType]: # register aliases for convenience + RENDER_STYLE_MAP[alias] = StyleType -def treestr(root : Node, style : Union[str, AbstractStyle]=ContStyle(), childiter : Callable[[tuple[Node]], Iterable[Node]]=list, maxlevel : int=None) -> str: +def treestr(root : Node, attr : str='name', style : Union[str, AbstractStyle]=ContStyle(), childiter : Callable[[tuple[Node]], Iterable[Node]]=list, maxlevel : int=None) -> str: '''Return a printable string representation of a tree from a root Node, reminiscent of GNU tree''' if isinstance(style, str): StyleType = RENDER_STYLE_MAP[style] # will raise KeyError if undefined style is provided @@ -23,4 +30,4 @@ def treestr(root : Node, style : Union[str, AbstractStyle]=ContStyle(), childite style=style, childiter=childiter, maxlevel=maxlevel, - ) \ No newline at end of file + ).by_attr(attr) \ No newline at end of file From de9b986a40ca1e9c2a2fe241c70087b9d152f4d0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 16:28:17 -0700 Subject: [PATCH 037/191] Provided Path-specific implementation of trees --- polymerist/genutils/fileutils/filetree.py | 35 +++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/fileutils/filetree.py b/polymerist/genutils/fileutils/filetree.py index e7d81b6..85620e5 100644 --- a/polymerist/genutils/fileutils/filetree.py +++ b/polymerist/genutils/fileutils/filetree.py @@ -1,12 +1,43 @@ '''Tools for manipulating files and directories in the file system''' -from typing import Callable, Optional +from typing import Iterable from pathlib import Path from subprocess import Popen +from ..treetools.treebase import NodeCorrespondence, compile_tree_factory +from ..treetools.treeviz import treestr +from ..decorators.functional import allow_string_paths -# Path filetree functions (act on file system and directories) + +# FILE TREES +class PathToNodeCorrespondence(NodeCorrespondence, FROMTYPE=Path): + '''Concrete implementation of how to produce filetrees from pathlib Paths''' + def name(self, path : Path) -> str: + return path.name + + def has_children(self, path : Path) -> bool: + return path.is_dir() + + def children(self, path) -> Iterable[Path]: + return path.iterdir() + +path_tree = file_tree = allow_string_paths( + compile_tree_factory( + PathToNodeCorrespondence(), + obj_attr_name='ppath' # NOTE: can't call this "path", as that clashes with an attribute of Node also called "Path" + ) +) +dir_tree = allow_string_paths( + compile_tree_factory( + PathToNodeCorrespondence(), + class_alias='directory', + obj_attr_name='dir', + exclude_mixin=lambda path : path.is_file() + ) +) + +# MODIFICATIONS TO DIRECTORIES ON DISC def startfile(path : Path) -> None: '''Replacement for os.startfile() functionality, since none natively exists in Linux''' Popen(['xdg-open', path]) From c9a68f0059cfc8a25bd0ff4e06f9405e78f0c004 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 16:47:39 -0700 Subject: [PATCH 038/191] Added dummy directory for testing filetree capability --- polymerist/tests/data/dummy_dir/bar.txt | 0 polymerist/tests/data/dummy_dir/foo.dat | 0 polymerist/tests/data/dummy_dir/subdir1/spam.dat | 0 polymerist/tests/data/dummy_dir/subdir1/subdir2/baz.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 polymerist/tests/data/dummy_dir/bar.txt create mode 100644 polymerist/tests/data/dummy_dir/foo.dat create mode 100644 polymerist/tests/data/dummy_dir/subdir1/spam.dat create mode 100644 polymerist/tests/data/dummy_dir/subdir1/subdir2/baz.txt diff --git a/polymerist/tests/data/dummy_dir/bar.txt b/polymerist/tests/data/dummy_dir/bar.txt new file mode 100644 index 0000000..e69de29 diff --git a/polymerist/tests/data/dummy_dir/foo.dat b/polymerist/tests/data/dummy_dir/foo.dat new file mode 100644 index 0000000..e69de29 diff --git a/polymerist/tests/data/dummy_dir/subdir1/spam.dat b/polymerist/tests/data/dummy_dir/subdir1/spam.dat new file mode 100644 index 0000000..e69de29 diff --git a/polymerist/tests/data/dummy_dir/subdir1/subdir2/baz.txt b/polymerist/tests/data/dummy_dir/subdir1/subdir2/baz.txt new file mode 100644 index 0000000..e69de29 From fed1b37c5640d6a53d980f7a8c6dfc641f54970a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 16:48:22 -0700 Subject: [PATCH 039/191] Modified pyproject.toml to ship subdirectories within tests.data (not just toplevel files) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f645349..f1acf50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ polymerist = [ # "look_and_say.dat" # ] "polymerist.tests.data" = [ - "*.*", # include any data files shipped in tests.data + "**/*.*", # include any data files shipped in tests.data ] [tool.versioningit] From 3b239452dc3f20426b48093d5714bb6d20dcd6bf Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 16:59:40 -0700 Subject: [PATCH 040/191] Added unit tests module for filetree --- .../tests/genutils/fileutils/test_filetree.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 polymerist/tests/genutils/fileutils/test_filetree.py diff --git a/polymerist/tests/genutils/fileutils/test_filetree.py b/polymerist/tests/genutils/fileutils/test_filetree.py new file mode 100644 index 0000000..e0f0a98 --- /dev/null +++ b/polymerist/tests/genutils/fileutils/test_filetree.py @@ -0,0 +1,13 @@ +'''Unit tests for filetree operations''' + +import pytest +from pathlib import Path + +from polymerist.genutils.pkginspect import get_dir_path_within_package +from polymerist.tests import data as testdata + + +@pytest.fixture +def testdir() -> Path: + return get_dir_path_within_package('dummy_dir', testdata) + \ No newline at end of file From 5860d77999aa6e8eb90ec91b58f635993e421b43 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 17:05:44 -0700 Subject: [PATCH 041/191] Deprecated OpenMM dependencies for duration and moved it within genutils --- polymerist/{ => genutils}/duration.py | 36 +++++++++++---------------- 1 file changed, 14 insertions(+), 22 deletions(-) rename polymerist/{ => genutils}/duration.py (79%) diff --git a/polymerist/duration.py b/polymerist/genutils/duration.py similarity index 79% rename from polymerist/duration.py rename to polymerist/genutils/duration.py index e29c670..82d1ea5 100644 --- a/polymerist/duration.py +++ b/polymerist/genutils/duration.py @@ -6,36 +6,28 @@ from time import time from string import Template from datetime import timedelta -from openmm.unit import Quantity, microsecond, millisecond, second, minute, hour, day, year # TODO : remove dependence on OpenMM for Unit support -from .genutils.typetools.categorical import _union_member_factory +from .typetools.categorical import _union_member_factory # TIME CONVERSION CONSTANTS -SECONDS_PER_INTERVAL = { # lookup of conversion factors to seconds - unit.get_name() : unit.conversion_factor_to(second) - for unit in (year, day, hour, minute, second, millisecond, microsecond) +SECONDS_PER_INTERVAL = { # hard-coded version which omits dependency on OpenMM/other unit engine + 'year' : 31_557_600.0, + 'day' : 86_400.0, + 'hour' : 3_600.0, + 'minute' : 60.0, + 'second' : 1.0, + 'millisecond' : 1E-3, + 'microsecond' : 1E-6, } -SECONDS_PER_INTERVAL['year'] = SECONDS_PER_INTERVAL.pop('julian year') # rename year from OpenMM default for clarity - SECONDS_PER_INTERVAL_ORDERED = { # arrange in descending order by magnitude of conversion factor unit_name : factor for unit_name, factor in sorted(SECONDS_PER_INTERVAL.items(), key=lambda x : x[1], reverse=True) } -# SECONDS_PER_INTERVAL_ORDERED = { # hard-coded version which omits dependency on OpenMM/other unit engine -# 'year' : 31_557_600.0, -# 'day' : 86_400.0, -# 'hour' : 3_600.0, -# 'minute' : 60.0, -# 'second' : 1.0, -# 'millisecond' : 1E-3, -# 'microsecond' : 1E-6, -# } - # TYPING AND CONVERSION -Timeable : TypeAlias = Union[int, float, timedelta, Quantity] +Timeable : TypeAlias = Union[int, float, timedelta] istimeable = _union_member_factory(Timeable, 'Timeable') def _convert_interval_to_seconds(interval : Timeable) -> float: @@ -46,10 +38,10 @@ def _convert_interval_to_seconds(interval : Timeable) -> float: return float(interval) elif isinstance(interval, timedelta): return interval.total_seconds() - elif isinstance(interval, Quantity): - if not interval.unit.is_compatible(second): - raise ValueError('Quantity must have units dimensions of time to be interpreted as an interval') - return interval.in_units_of(second)._value + # elif isinstance(interval, Quantity): # deprecated to avoid OpenMM requirement; may reintroduce standard unit engine for polymerist has been decided + # if not interval.unit.is_compatible(second): + # raise ValueError('Quantity must have units dimensions of time to be interpreted as an interval') + # return interval.in_units_of(second)._value else: raise TypeError(f'Unsupported type "{interval.__class__.__name__}" for time interval breakdown') From 6de61bd3ebd55e8c0028f59b23a2142fe87a4e88 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 17:37:24 -0700 Subject: [PATCH 042/191] Wrote unit tests for genutils.fileutils.filetree --- .../tests/genutils/fileutils/test_filetree.py | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/polymerist/tests/genutils/fileutils/test_filetree.py b/polymerist/tests/genutils/fileutils/test_filetree.py index e0f0a98..7044e97 100644 --- a/polymerist/tests/genutils/fileutils/test_filetree.py +++ b/polymerist/tests/genutils/fileutils/test_filetree.py @@ -2,12 +2,58 @@ import pytest from pathlib import Path +from anytree.iterators import PreOrderIter, LevelOrderGroupIter from polymerist.genutils.pkginspect import get_dir_path_within_package from polymerist.tests import data as testdata +from polymerist.genutils.fileutils.filetree import path_tree, dir_tree + + +DUMMY_DIR_INFO : dict[int, bool] = { # expected depth and dir status of test directory + 'dummy_dir' : (0, True), + 'subdir1' : (1, True), + 'bar.txt' : (1, False), + 'foo.dat' : (1, False), + 'subdir2' : (2, True), + 'spam.dat' : (2, False), + 'baz.txt' : (3, False), +} @pytest.fixture -def testdir() -> Path: +def dummy_dir_path() -> Path: return get_dir_path_within_package('dummy_dir', testdata) - \ No newline at end of file + +@pytest.mark.parametrize('depth', [0, 1, 2, 3, 4]) +def test_path_tree_depth(dummy_dir_path, depth : int) -> None: + '''Test that max_depth restrictions are correctly applied''' + tree = path_tree(dummy_dir_path, max_depth=depth) + assert tree.height <= depth + +@pytest.mark.parametrize('depth', [0, 1, 2, 3, 4]) +def test_path_tree_output(dummy_dir_path, depth : int) -> None: + '''Test that file names and path/dir status is correctly recorded''' + tree = path_tree(dummy_dir_path, max_depth=depth) + for node_group in LevelOrderGroupIter(tree): + for node in node_group: + expected = DUMMY_DIR_INFO[node.name] + encountered = (node.depth, node.ppath.is_dir()) + assert(expected == encountered) + +def test_path_tree_exclude(dummy_dir_path) -> None: + '''Check that path_tree correctly excludes paths by filter conditions''' + tree = path_tree(dummy_dir_path, max_depth=None, exclude=lambda path : path.suffix == '.dat') + for node in PreOrderIter(tree): + assert node.ppath.suffix != '.dat' + +def test_dir_tree(dummy_dir_path) -> None: + '''Test that dir_tree correctly filters down to just directories''' + tree = dir_tree(dummy_dir_path, max_depth=None) + for node in PreOrderIter(tree): + assert node.dir.is_dir() # note the necessary change of attribute from "ppath" to "dir" + +def test_dir_tree_exclude(dummy_dir_path) -> None: + '''Test that dir_tree correctly excludes directories by filter conditions''' + tree = dir_tree(dummy_dir_path, max_depth=None, exclude=lambda dir : dir.name.startswith('subdir')) + for node in PreOrderIter(tree): + assert not node.name.startswith('subdir') # note the necessary change of attribute from "ppath" to "dir" \ No newline at end of file From 25197b2965987049133be8da7d6b133d4e358f3d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 17:48:49 -0700 Subject: [PATCH 043/191] Grouped all dynamic import-related modules into dedicated genutils subpackage --- polymerist/genutils/importutils/__init__.py | 3 +++ polymerist/genutils/{ => importutils}/pkginspect.py | 0 polymerist/genutils/{importutils.py => importutils/pkgiter.py} | 2 +- polymerist/genutils/{ => importutils}/pyimports.py | 0 4 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 polymerist/genutils/importutils/__init__.py rename polymerist/genutils/{ => importutils}/pkginspect.py (100%) rename polymerist/genutils/{importutils.py => importutils/pkgiter.py} (98%) rename polymerist/genutils/{ => importutils}/pyimports.py (100%) diff --git a/polymerist/genutils/importutils/__init__.py b/polymerist/genutils/importutils/__init__.py new file mode 100644 index 0000000..067b687 --- /dev/null +++ b/polymerist/genutils/importutils/__init__.py @@ -0,0 +1,3 @@ +'''Functionality for dynamically importing and inspecting Python modules and packages''' + +from .pkgiter import module_hierarchy, iter_submodules, submodule_loggers \ No newline at end of file diff --git a/polymerist/genutils/pkginspect.py b/polymerist/genutils/importutils/pkginspect.py similarity index 100% rename from polymerist/genutils/pkginspect.py rename to polymerist/genutils/importutils/pkginspect.py diff --git a/polymerist/genutils/importutils.py b/polymerist/genutils/importutils/pkgiter.py similarity index 98% rename from polymerist/genutils/importutils.py rename to polymerist/genutils/importutils/pkgiter.py index 0ac3642..6ce5ab4 100644 --- a/polymerist/genutils/importutils.py +++ b/polymerist/genutils/importutils/pkgiter.py @@ -1,4 +1,4 @@ -'''Utilities for automating submodule import and logger creation''' +'''Tools for iterating over and extracting information from Python package hierarchies''' import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/genutils/pyimports.py b/polymerist/genutils/importutils/pyimports.py similarity index 100% rename from polymerist/genutils/pyimports.py rename to polymerist/genutils/importutils/pyimports.py From 73087386d1a4e643563b5793293eeec0cd027191 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 18:01:26 -0700 Subject: [PATCH 044/191] Updated docstrings and relative imports --- polymerist/genutils/fileutils/filetree.py | 2 +- polymerist/genutils/importutils/pyimports.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/fileutils/filetree.py b/polymerist/genutils/fileutils/filetree.py index 85620e5..eff2c2f 100644 --- a/polymerist/genutils/fileutils/filetree.py +++ b/polymerist/genutils/fileutils/filetree.py @@ -12,7 +12,7 @@ # FILE TREES class PathToNodeCorrespondence(NodeCorrespondence, FROMTYPE=Path): - '''Concrete implementation of how to produce filetrees from pathlib Paths''' + '''Concrete implementation of pathlib Paths as nodes in a tree''' def name(self, path : Path) -> str: return path.name diff --git a/polymerist/genutils/importutils/pyimports.py b/polymerist/genutils/importutils/pyimports.py index b7d2357..dc3b05e 100644 --- a/polymerist/genutils/importutils/pyimports.py +++ b/polymerist/genutils/importutils/pyimports.py @@ -7,7 +7,7 @@ import ast from pathlib import Path -from .decorators.functional import allow_string_paths # TODO: see if there's anyway to bypass a relative import here +from ..decorators.functional import allow_string_paths # TODO: see if there's anyway to bypass a relative import here @dataclass From 4b5061ffc607c293317baa5a431d4c1b39d39a8a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 20:56:27 -0700 Subject: [PATCH 045/191] Updated pkginspect imports following move into importutils --- polymerist/tests/genutils/fileutils/test_filetree.py | 2 +- .../genutils/sequences/discernment/test_discernment.py | 2 +- polymerist/tests/genutils/test_pkginspect.py | 9 +++++---- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/polymerist/tests/genutils/fileutils/test_filetree.py b/polymerist/tests/genutils/fileutils/test_filetree.py index 7044e97..d220959 100644 --- a/polymerist/tests/genutils/fileutils/test_filetree.py +++ b/polymerist/tests/genutils/fileutils/test_filetree.py @@ -4,7 +4,7 @@ from pathlib import Path from anytree.iterators import PreOrderIter, LevelOrderGroupIter -from polymerist.genutils.pkginspect import get_dir_path_within_package +from polymerist.genutils.importutils.pkginspect import get_dir_path_within_package from polymerist.tests import data as testdata from polymerist.genutils.fileutils.filetree import path_tree, dir_tree diff --git a/polymerist/tests/genutils/sequences/discernment/test_discernment.py b/polymerist/tests/genutils/sequences/discernment/test_discernment.py index 3e9d347..728a362 100644 --- a/polymerist/tests/genutils/sequences/discernment/test_discernment.py +++ b/polymerist/tests/genutils/sequences/discernment/test_discernment.py @@ -1,7 +1,7 @@ '''Unit tests for DISCERNMENT-related functionality''' import pytest -from polymerist.genutils.pkginspect import get_file_path_within_package +from polymerist.genutils.importutils.pkginspect import get_file_path_within_package from polymerist.tests import data as testdata import json diff --git a/polymerist/tests/genutils/test_pkginspect.py b/polymerist/tests/genutils/test_pkginspect.py index a8edbd9..6a6e576 100644 --- a/polymerist/tests/genutils/test_pkginspect.py +++ b/polymerist/tests/genutils/test_pkginspect.py @@ -8,7 +8,8 @@ from polymerist import polymerist # this is a dummy toplevel module, and NOt the entire polymerist package from polymerist import genutils -from polymerist.genutils import pkginspect +from polymerist.genutils import importutils +from polymerist.genutils.importutils import pkginspect from polymerist import tests @@ -79,7 +80,7 @@ def test_is_module_fail_on_invalid_types(non_module_type : type) -> None: ('data', tests), ('data/sample.dat', tests), pytest.param('daata/simple.dat', tests, marks=pytest.mark.xfail(raises=ValueError, reason="This isn't a real file", strict=True)), - ('pkginspect.py', genutils), + ('pkginspect.py', importutils), pytest.param('fake/whatever.txt', pkginspect, marks=pytest.mark.xfail(raises=TypeError, reason="Module is not a package and therefore cannot contain resources", strict=True)), ] ) @@ -94,7 +95,7 @@ def test_get_resource_path(rel_path : str, module : ModuleType) -> None: pytest.param('data', tests, marks=pytest.mark.xfail(raises=FileNotFoundError, reason="This is a directory, NOT a file", strict=True)), ('data/sample.dat', tests), pytest.param('daata/simple.dat', tests, marks=pytest.mark.xfail(raises=ValueError, reason="This isn't a real file", strict=True)), - ('pkginspect.py', genutils), + ('pkginspect.py', importutils), pytest.param('fake/whatever.txt', pkginspect, marks=pytest.mark.xfail(raises=TypeError, reason="Module is not a package and therefore cannot contain resources", strict=True)), ] ) @@ -109,7 +110,7 @@ def test_get_file_path(rel_path : str, module : ModuleType) -> None: ('data', tests), pytest.param('data/sample.dat', tests, marks=pytest.mark.xfail(raises=NotADirectoryError, reason='This IS a real file, but not a directory', strict=True)), pytest.param('daata/simple.dat', tests, marks=pytest.mark.xfail(raises=ValueError, reason="This isn't a real file", strict=True)), - pytest.param('pkginspect.py', genutils, marks=pytest.mark.xfail(raises=NotADirectoryError, reason='This IS a real file, but not a directory', strict=True)), + pytest.param('pkginspect.py', importutils, marks=pytest.mark.xfail(raises=NotADirectoryError, reason='This IS a real file, but not a directory', strict=True)), pytest.param('fake/whatever.txt', pkginspect, marks=pytest.mark.xfail(raises=TypeError, reason="Module is not a package and therefore cannot contain resources", strict=True)), ] ) From 5916a47c7c374022dcfa4a5d431d98890508909a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 20:56:47 -0700 Subject: [PATCH 046/191] Added module name info extraction functions --- polymerist/genutils/importutils/pkginspect.py | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/polymerist/genutils/importutils/pkginspect.py b/polymerist/genutils/importutils/pkginspect.py index a160ff0..f05bff8 100644 --- a/polymerist/genutils/importutils/pkginspect.py +++ b/polymerist/genutils/importutils/pkginspect.py @@ -1,6 +1,7 @@ '''For checking whether object are valid Python modules and packages, and if so for gathering info from within them''' -from typing import Union +from typing import Optional, Union +from types import ModuleType from pathlib import Path from importlib.resources import ( @@ -10,6 +11,7 @@ from importlib.resources._common import get_package, from_package, resolve +# CHECKING PACKAGE AND MODULE STATUS def is_module(module : Package) -> bool: '''Determine whether a given Package-like (i.e. str or ModuleType) is a valid Python module This will return True for packages, bottom-level modules (i.e. *.py) and Python scripts''' @@ -27,7 +29,45 @@ def is_package(package : Package) -> bool: except (ModuleNotFoundError, TypeError): return False +# EXTRACTING MODULE NAMING INFO +def flexible_module_pass(module : Union[str, Path, ModuleType]) -> ModuleType: # TODO: extend this to decorator + '''Flexible interface for supplying a ModuleType object as an argument + Allows for passing a name (either module name or string path), Path location, or a module proper''' + if isinstance(module, (str, ModuleType)): + return resolve(module) + elif isinstance(module, Path): + raise NotImplementedError + else: + raise TypeError(f'Cannot interpret object of type "{type(module).__name__}" as a module') +# TODO : find way to get depth of submodule in toplevel ("number of dots" before standalone name) +def module_parts(module : Union[str, ModuleType]) -> tuple[Optional[str], str]: + '''Takes a module (as its name or as ModuleType) and returns its parent package name and relative module name''' + module = resolve(module) + module_name = module.__spec__.name + parent_package_name, _, module_stem = module_name.rpartition('.') # split on rightmost dot separator + if not parent_package_name: + parent_package_name = None + + return parent_package_name, module_stem + +def module_stem(module : Union[str, ModuleType]) -> tuple[Optional[str], str]: + '''Takes a module (as its name or as ModuleType) and returns its relative module name''' + return module_parts(module)[-1] + +def relative_module_name(module : ModuleType, relative_to : Optional[ModuleType]=None, remove_leading_dot : bool=True) -> str: + '''Gets the name of a module relative to another (presumably toplevel) module + If the given module is not in the path of the toplevel module, will simply return as module.__name__''' + rel_mod_name = module.__spec__.name + if relative_to is not None: + toplevel_prefix = relative_to.__spec__.name + if remove_leading_dot: + toplevel_prefix += '.' # append dot to prefix to remove it later + rel_mod_name = rel_mod_name.removeprefix(toplevel_prefix) + + return rel_mod_name + +# FETCHING RESOURCES FROM PATHS WITHIN PACKAGES def get_resource_path_within_package(relative_path : Union[str, Path], package : Package) -> Path: '''Get the Path to a resource (i.e. either a directory or a file) which lives within a Python package''' package_path : Path = get_package_path(package) # will also implicitly check that the provided package exists as a module From b9b91695313ecbef81c8cb24e6cb4e118016d4c1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 20:57:05 -0700 Subject: [PATCH 047/191] Completely reimplemented module hierarchies in terms of trees --- polymerist/genutils/importutils/pkgiter.py | 228 ++++++++++++++------- 1 file changed, 151 insertions(+), 77 deletions(-) diff --git a/polymerist/genutils/importutils/pkgiter.py b/polymerist/genutils/importutils/pkgiter.py index 6ce5ab4..b3fc63a 100644 --- a/polymerist/genutils/importutils/pkgiter.py +++ b/polymerist/genutils/importutils/pkgiter.py @@ -4,94 +4,168 @@ LOGGER = logging.getLogger(__name__) from types import ModuleType -from typing import Generator, Iterable, Optional +from typing import Generator, Iterable, Optional, Union -import pkgutil -import importlib +from importlib import import_module +from pkgutil import iter_modules -from enum import StrEnum -from itertools import chain +from anytree.node import Node +from anytree.render import AbstractStyle, ContStyle +from anytree.iterators import PreOrderIter +from ..treetools.treebase import NodeCorrespondence, compile_tree_factory +from ..treetools.treeviz import treestr +from .pkginspect import module_stem, is_package -# FILETREE CHARACTERS -_TREE_WIDTH : int = 4 -assert(_TREE_WIDTH > 0) -class TreeChars(StrEnum): - '''Box characters for representing connections between components of a hierarchcal structure''' - SPACE = ' '*_TREE_WIDTH - DASH = '\u2500'*_TREE_WIDTH - PIPE = '\u2502' + ' '*(_TREE_WIDTH - 1) - BRANCH = '\u251C' + '\u2500'*(_TREE_WIDTH - 1) - ELBOW = '\u2514' + '\u2500'*(_TREE_WIDTH - 1) - ELBOW_R = '\u2514' + '\u2500'*(_TREE_WIDTH - 1) # direction-agnostic alias - ELBOW_L = '\u2500'*(_TREE_WIDTH - 1) + '\u2518' - -# BASE SUBMODULE GENERATORS -def module_by_pkg_str(pkg_str : str) -> ModuleType: - '''Load module from dot-separated module name string (intended to be called on __package__ attr)''' - return importlib.import_module(pkg_str) - -def iter_submodule_info(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> Generator[tuple[ModuleType, str, bool], None, None]: - '''Generate all submodules of a given module, yielding a tuple of (the module, the module name, and whether the module is also a package). - If the "recursive" flag is set, will generate ALL possible submodules in the tree recursively''' +# HIERARCHICAL MODULE TREE GENERATION +class ModuleToNodeCorrespondence(NodeCorrespondence, FROMTYPE=ModuleType): + '''Concrete implementation of Python modules and packages as nodes in a tree''' + def name(self, module : ModuleType) -> str: + return module_stem(module) # TODO: find Python package-spec compliant way of extracting this easily + + def has_children(self, module : ModuleType) -> bool: + return is_package(module) + + def children(self, module : ModuleType) -> Iterable[ModuleType]: + for _loader, module_name, ispkg in iter_modules(module.__path__, prefix=module.__name__+'.'): + try: + submodule = import_module(module_name) + yield submodule + except ModuleNotFoundError: + continue + +module_tree = compile_tree_factory( + ModuleToNodeCorrespondence(), + class_alias='package', + obj_attr_name='module', + exclude_mixin=lambda module : module_stem(module).startswith('_'), +) + +# BACKWARDS-COMPATIBLE PORTS OF LEGACY IMPORTUTILS FUNCTIONS +def module_tree_direct(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> Node: + '''Produce a tree from the Python package hierarchy starting with a given module + + Parameters + ---------- + module : ModuleType + The "root" module to begin importing from + Represented in the Node object returned by this function + recursive : bool, default=True + Whether or not to recursively import modules from subpackages and add them to the tree + blacklist : list[str] (optional), default None + List of module names to exclude from tree building + If provided, will exclude any modules whose names occur in this list + + Returns + ------- + modtree : Node + The root node of the module tree, corresponding to the module object passed to "module" + ''' if blacklist is None: blacklist = [] - - for _loader, submodule_name, submodule_ispkg in pkgutil.iter_modules(module.__path__): - if submodule_name in blacklist: - continue # skip over import blacklisted modules - - try: - submodule = importlib.import_module(f'{module.__package__}.{submodule_name}') - except ModuleNotFoundError: - continue - yield (submodule, submodule_name, submodule_ispkg) - if submodule_ispkg and recursive: - yield from iter_submodule_info(submodule, recursive=True, blacklist=blacklist) + return module_tree( + module, + max_depth=None if recursive else 1, + exclude=lambda module : module_stem(module) in blacklist, + ) def iter_submodules(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> Generator[ModuleType, None, None]: - '''Generate all submodules of a given module. If the "recursive" flag is set, will generate ALL possible submodules in the tree recursively''' - for submodule, *_submodule_info in iter_submodule_info(module, recursive=recursive, blacklist=blacklist): - yield submodule # only yield submodule; for more compact iteration + ''' + Generates all modules which can be imported from the given toplevel module + + Parameters + ---------- + module : ModuleType + The "root" module to begin importing from + Represented in the Node object returned by this function + recursive : bool, default=True + Whether or not to recursively import modules from subpackages and add them to the tree + blacklist : list[str] (optional), default None + List of module names to exclude from tree building + If provided, will exclude any modules whose names occur in this list + + Returns + ------- + submodules : Generator[ModuleType] + A generator which yields modules in traversal pre-order as they appear wihin the package hierarchy + ''' + modtree = module_tree_direct(module, recursive=recursive, blacklist=blacklist) + for module_node in PreOrderIter(modtree): + yield module_node.module +def iter_submodule_info(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> Generator[tuple[ModuleType, str, bool], None, None]: + ''' + Generates information about all modules which can be imported from the given toplevel module + Namely, yields the module object, module name, and whether or not the module is a package + + Parameters + ---------- + module : ModuleType + The "root" module to begin importing from + Represented in the Node object returned by this function + recursive : bool, default=True + Whether or not to recursively import modules from subpackages and add them to the tree + blacklist : list[str] (optional), default None + List of module names to exclude from tree building + If provided, will exclude any modules whose names occur in this list + + Returns + ------- + submodule_info : Generator[ModuleType, str, bool] + A generator which yields modules info in traversal pre-order as they appear wihin the package hierarchy + yields 3-tuples containing ModuleType objects, module names, and whether the current module is also a subpackage + ''' + modtree = module_tree_direct(module, recursive=recursive, blacklist=blacklist) + for module_node in PreOrderIter(modtree): + yield module_node.module, module_node.name, module_node.is_leaf -# TOOLS FOR REGISTERING AND PULLING INFO FROM SUBMODULES def register_submodules(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> None: - '''Registers submodules of a given module into it's own namespace (i.e. autoimports submodules)''' - for (submodule, submodule_name, submodule_ispkg) in iter_submodule_info(module, recursive=False, blacklist=blacklist): # initially only iterate on one level to keep track of parent module - setattr(module, submodule_name, submodule) - if submodule_ispkg and recursive: - register_submodules(submodule, recursive=recursive, blacklist=blacklist) - -def _module_hierarchy(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None, _prefix : str='') -> Generator[str, None, None]: - '''Returns an iterable of level strings representing a module hierarchy, with each level of nesting indicated by a series of pipes and dashes''' - end_sentinel = (object(), object(), object()) # used to unambiguously check whether the initial generator is in fact empty - module_iter = chain(iter_submodule_info(module, recursive=False, blacklist=blacklist), [end_sentinel]) - - module_info = next(module_iter) # primer for hierarchy iteration, need to first check - reached_end = bool(module_info == end_sentinel) # if module hierarchy is empty, don't bother trying to iterate - while not reached_end: - (submodule, submodule_name, submodule_ispkg) = module_info # unpacking to keep current module_info values in namespace (and for convenience) - module_info = next(module_iter) # peek ahead to check if the current module is in fact the last at this layer - if module_info == end_sentinel: - splitter, extension = TreeChars.ELBOW, TreeChars.SPACE - reached_end = True - else: - splitter, extension = TreeChars.BRANCH, TreeChars.PIPE - - yield _prefix + splitter + submodule_name - if submodule_ispkg and recursive: - yield from _module_hierarchy(submodule, recursive=recursive, blacklist=blacklist, _prefix=_prefix + extension) # retrieve partial output - -def module_hierarchy(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> str: - return '\n'.join(_module_hierarchy(module, recursive=recursive, blacklist=blacklist)) - -def submodule_loggers(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None) -> dict[str, Optional[logging.Logger]]: - '''Produce a dict of any Logger objects present in each submodule. Can optionally generate recursively and blacklist certain modules''' - return { - submodule_name : getattr(submodule, 'LOGGER', None) # default to None rather than raising Exception - for (submodule, submodule_name, submodule_ispkg) in iter_submodule_info(module, recursive=recursive, blacklist=blacklist) - } \ No newline at end of file + ''' + Registers all submodules of a given module into it's own namespace (i.e. autoimports submodules) + + Parameters + ---------- + module : ModuleType + The "root" module to begin importing from + Represented in the Node object returned by this function + recursive : bool, default=True + Whether or not to recursively import modules from subpackages and add them to the tree + blacklist : list[str] (optional), default None + List of module names to exclude from tree building + If provided, will exclude any modules whose names occur in this list + + Returns + ------- + None + ''' + for submodule in iter_submodules(module, recursive=recursive, blacklist=blacklist): + setattr(module, submodule.__name__, submodule) + +def module_hierarchy(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None, style : Union[str, AbstractStyle]=ContStyle()) -> str: + ''' + Generates a printable string which summarizes a Python packages hierarchy. Reminiscent of GNU tree output + + Parameters + ---------- + module : ModuleType + The "root" module to begin importing from + Represented in the Node object returned by this function + recursive : bool, default=True + Whether or not to recursively import modules from subpackages and add them to the tree + blacklist : list[str] (optional), default None + List of module names to exclude from tree building + If provided, will exclude any modules whose names occur in this list + style : str or AbstractStyle + An element drawing style for the final tree structure printout + + Returns + ------- + module_summary : str + Printable string which displays the package structure + ''' + modtree = module_tree_direct(module, recursive=recursive, blacklist=blacklist) + return treestr(modtree, style=style) + From 7f21c3df386790ffb1b70af021d6215e9c8f5c06 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 20:57:25 -0700 Subject: [PATCH 048/191] Moved submodule_loggers from importutils into logutils --- polymerist/genutils/logutils/IOHandlers.py | 41 ++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/logutils/IOHandlers.py b/polymerist/genutils/logutils/IOHandlers.py index c5cbff2..1d83dd8 100644 --- a/polymerist/genutils/logutils/IOHandlers.py +++ b/polymerist/genutils/logutils/IOHandlers.py @@ -4,12 +4,15 @@ from logging import Logger from traceback import format_exception -from pathlib import Path +from types import ModuleType from typing import Iterable, Optional, Union -from .timestamps import Timestamp, TIMESTAMP_LOG +from pathlib import Path from datetime import datetime +from .timestamps import Timestamp, TIMESTAMP_LOG +from ..importutils.pkgiter import iter_submodules + # DATE AND TIME FORMATTING LOG_FORMATTER = logging.Formatter('%(asctime)s.%(msecs)03d [%(levelname)-8s:%(module)16s:line %(lineno)-4d] - %(message)s', datefmt=TIMESTAMP_LOG) # TODO : make this more generic @@ -25,6 +28,40 @@ def get_active_loggers() -> list[Logger]: if isinstance(possible_logger, Logger) # omits PlaceHolder objects ] +def submodule_loggers(module : ModuleType, recursive : bool=True, blacklist : Optional[Iterable[str]]=None, sparse : bool=True) -> dict[str, Optional[logging.Logger]]: + ''' + Produce a dict of any Logger objects present in each submodule. Can optionally generate recursively and blacklist certain modules + + Parameters + ---------- + module : ModuleType + The "root" module to begin importing from + Represented in the Node object returned by this function + recursive : bool, default=True + Whether or not to recursively import modules from subpackages and add them to the tree + blacklist : list[str] (optional), default None + List of module names to exclude from tree building + If provided, will exclude any modules whose names occur in this list + sparse : bool, default=True + Whether to only include modules which have a Logger defined (i.e. exclude all NoneType entries from returned dict) + + Returns + ------- + logger_registry : dict[str, Optional[logging.Logger]] + A dict keyed by module name whose values are the corresponding Logger bound to that module + ''' + logger_registry = {} + for module in iter_submodules(module, recursive=recursive, blacklist=blacklist): + full_module_name = module.__name__ + module_logger = logging.root.manager.loggerDict.get(full_module_name, None) + if isinstance(module, logging.PlaceHolder): # exclude dummy Placeholder loggers + module_logger = None + + if not (sparse and module_logger is None): + logger_registry[full_module_name] = module_logger + + return logger_registry + # FILE-STREAM HANDLING CLASSES class MultiStreamFileHandler(logging.FileHandler): '''Class to simplify logging file I/O given multiple logger sources providing logging input From 99194907d0f0bc5228e9d7c852325e928a0ea5e8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 20:57:43 -0700 Subject: [PATCH 049/191] Registered core functionality at importutils subpackage level --- polymerist/genutils/importutils/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/polymerist/genutils/importutils/__init__.py b/polymerist/genutils/importutils/__init__.py index 067b687..1486139 100644 --- a/polymerist/genutils/importutils/__init__.py +++ b/polymerist/genutils/importutils/__init__.py @@ -1,3 +1,4 @@ '''Functionality for dynamically importing and inspecting Python modules and packages''' -from .pkgiter import module_hierarchy, iter_submodules, submodule_loggers \ No newline at end of file +from .pkgiter import module_hierarchy, iter_submodules +from .pkginspect import is_package, is_module \ No newline at end of file From 355331ba096061f7aaf902a106ea622931196d27 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 13 Nov 2024 21:00:37 -0700 Subject: [PATCH 050/191] Moved tree testing into tests module proper --- polymerist/{ => tests}/genutils/treetools/test_trees.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename polymerist/{ => tests}/genutils/treetools/test_trees.py (100%) diff --git a/polymerist/genutils/treetools/test_trees.py b/polymerist/tests/genutils/treetools/test_trees.py similarity index 100% rename from polymerist/genutils/treetools/test_trees.py rename to polymerist/tests/genutils/treetools/test_trees.py From fa461f8073c7ba10ce4c89c32d89d5337b5063f9 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 13:09:23 -0700 Subject: [PATCH 051/191] Separated node attr extraction from copying, enhanced docstrings --- polymerist/genutils/treetools/treecopy.py | 87 ++++++++++++++++++++--- 1 file changed, 77 insertions(+), 10 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index eb4ed9f..c9061f4 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -1,6 +1,6 @@ '''Tools for copying parts and wholes of trees, at various levels of resolution''' -from typing import Optional +from typing import Any, Optional from anytree.node import Node from anytree.exporter import DictExporter @@ -8,24 +8,91 @@ from ..filters import Filter, ALWAYS_TRUE_FILTER, ALWAYS_FALSE_FILTER -def copy_node_attrs(node : Node, attr_filter : Filter[str]=ALWAYS_TRUE_FILTER) -> Node: - '''Create a copy of a Node with only its attributes and no set ancestors or descendents''' +def get_node_attrs(node : Node, attr_filter : Optional[Filter[str]]=None) -> dict[str, Any]: + ''' + Return a dict of all attributes set on a Node + + Parameters + ---------- + node : Node + An anytree.node.Node object + attr_filter : Filter[str] (optional), default lambda x : True + An optional criterion to decide whether an attribute should be kept + Should be a function which accepts a single string arg and returns a bool + Return value of True will include an attribute in the return, while value or False will exclude it + If None, will default to always True (i.e. no attributes will be screened out) + + Returns + ------- + node_attrs : dict[str, Any] + A dictionary keyed by attribute name whose values are the values set for repective Node attributes + ''' + if attr_filter is None: + attr_filter = ALWAYS_TRUE_FILTER + node_attrs = { attr_name : attr for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children if attr_filter(attr_name) } # assert(node_copy.children == tuple()) - return Node(**node_attrs) + return node_attrs + +def copy_node_unbound(node : Node, attr_filter : Optional[Filter[str]]=None) -> Node: + ''' + Create a copy of a Node with matching attributes but no ancestors or descendents + + Parameters + ---------- + node : Node + An anytree.node.Node object + attr_filter : Filter[str] (optional), default lambda x : True + An optional criterion to decide whether an attribute should be kept + Should be a function which accepts a single string arg and returns a bool + Return value of True will include an attribute in the return, while value or False will exclude it + If None, will default to always True (i.e. no attributes will be screened out) + + Returns + ------- + node_copy : Node + A new Node object which has the same attributes as + the original node but none of the ancestors or descendants + ''' + return Node(**get_node_attrs(node=node, attr_filter=attr_filter)) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree -def copy_tree(node : Node, stop : Filter[Node]=ALWAYS_FALSE_FILTER, attr_filter : Filter[str]=ALWAYS_TRUE_FILTER) -> Node: - '''Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches''' - node_copy = copy_node_attrs(node, attr_filter=attr_filter) # make a read-only copy of JUST the current node's attributes - for child in node.children: # NOTE: this also works for leaf nodes, as their "children" attrs is just an empty tuple +def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Optional[Filter[str]]=None) -> Node: + ''' + Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches + + Parameters + ---------- + root : Node + An anytree.node.Node object which is the root of a tree-like hierarchy + stop : Filter[Node] (optional), default None + An optional criterion to decide when to stop traversing the tree to be copied + Should be a function which accepts a single Node arg and returns a bool + Return value of True will exclude all subsequent nodes on a branch, while value of True will proceed with iteration and copying + If None, will default to always False (i.e. no extra stop conditions, full tree will be copied) + attr_filter : Filter[str] (optional), default lambda x : True + An optional criterion to decide whether an attribute should be kept + Should be a function which accepts a single string arg and returns a bool + Return value of True will include an attribute in the return, while value or False will exclude it + If None, will default to always True (i.e. no attributes will be screened out) + + Returns + ------- + root_new : Node + The root Node of the copied tree structure + ''' + if stop is None: + stop = ALWAYS_FALSE_FILTER + + root_new = copy_node_unbound(root, attr_filter=attr_filter) + for child in root.children: # NOTE: this also works for leaf nodes, as their "children" attrs is just an empty tuple if stop(child): continue child_copy = copy_tree(child, stop=stop, attr_filter=attr_filter) # recursively copy children until stop criterion - child_copy.parent = node_copy + child_copy.parent = root_new - return node_copy \ No newline at end of file + return root_new \ No newline at end of file From 41d1fea68ad6908b25a7bdab58f25f6d0e7f975d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 13:33:51 -0700 Subject: [PATCH 052/191] Added option to omit "name" from get_node_attributes (useful for copying) --- polymerist/genutils/treetools/treecopy.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index c9061f4..fd99dee 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -2,13 +2,14 @@ from typing import Any, Optional +from functools import reduce from anytree.node import Node from anytree.exporter import DictExporter from ..filters import Filter, ALWAYS_TRUE_FILTER, ALWAYS_FALSE_FILTER -def get_node_attrs(node : Node, attr_filter : Optional[Filter[str]]=None) -> dict[str, Any]: +def get_node_attrs(node : Node, attr_filter : Optional[Filter[str]]=None, include_name : bool=False) -> dict[str, Any]: ''' Return a dict of all attributes set on a Node @@ -21,6 +22,10 @@ def get_node_attrs(node : Node, attr_filter : Optional[Filter[str]]=None) -> dic Should be a function which accepts a single string arg and returns a bool Return value of True will include an attribute in the return, while value or False will exclude it If None, will default to always True (i.e. no attributes will be screened out) + include_name : bool, deafult False + Whether to include the required "name" attribute of a Node in the returned dict + Useful to exclude when copying nodes to avoid redundancy + By default False (i.e. "name" will be excluded from the returned attributes) Returns ------- @@ -30,10 +35,15 @@ def get_node_attrs(node : Node, attr_filter : Optional[Filter[str]]=None) -> dic if attr_filter is None: attr_filter = ALWAYS_TRUE_FILTER + if not include_name: # NOTE: need to rebind to different name to avoid recursion error + _attr_filter = lambda attr_name : attr_filter(attr_name) and attr_name != 'name' + else: + _attr_filter = attr_filter + node_attrs = { attr_name : attr for attr_name, attr in DictExporter._iter_attr_values(node) # NOTE: this is necessary to omit mangled NodeMixin info about parents and children - if attr_filter(attr_name) + if _attr_filter(attr_name) } # assert(node_copy.children == tuple()) return node_attrs @@ -58,7 +68,7 @@ def copy_node_unbound(node : Node, attr_filter : Optional[Filter[str]]=None) -> A new Node object which has the same attributes as the original node but none of the ancestors or descendants ''' - return Node(**get_node_attrs(node=node, attr_filter=attr_filter)) + return Node(name=node.name, **get_node_attrs(node=node, attr_filter=attr_filter, include_name=False)) # NOTE: explicitly exclude a filter criterion here, as filtering (rather than stopping) may result in deleted nodes IN THE MIDDLE of a tree def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Optional[Filter[str]]=None) -> Node: From cffebdb0cda0e2be14e80601bb88e359a4369648 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 14:07:22 -0700 Subject: [PATCH 053/191] Added duoplicate-name node to example tree to strengthen robustness of tests --- polymerist/tests/genutils/treetools/test_trees.py | 1 + 1 file changed, 1 insertion(+) diff --git a/polymerist/tests/genutils/treetools/test_trees.py b/polymerist/tests/genutils/treetools/test_trees.py index 1eccda2..fb7454e 100644 --- a/polymerist/tests/genutils/treetools/test_trees.py +++ b/polymerist/tests/genutils/treetools/test_trees.py @@ -13,5 +13,6 @@ def example_tree_for_tests() -> Node: # TODO: move to separate tests module even g = Node('g', foo='gg', parent=root) i = Node('i', foo='ii', parent=g) h = Node('h', foo='hh', parent=i) + a_dup = Node('a', foo='a+a', parent=g) # testing how nodes with duplicate names are handled return root \ No newline at end of file From 6481c07284ef4e443bc51ada2da88e2379cc8096 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 14:07:34 -0700 Subject: [PATCH 054/191] Clarified stop criterion inclusivity in docstring --- polymerist/genutils/treetools/treecopy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index fd99dee..1424388 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -81,6 +81,9 @@ def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Opt An anytree.node.Node object which is the root of a tree-like hierarchy stop : Filter[Node] (optional), default None An optional criterion to decide when to stop traversing the tree to be copied + This criterion is not inclusive, i.e. a Node matching this criterion WILL be + included in the copied tree, but any child Nodes of this flagged node will not + Should be a function which accepts a single Node arg and returns a bool Return value of True will exclude all subsequent nodes on a branch, while value of True will proceed with iteration and copying If None, will default to always False (i.e. no extra stop conditions, full tree will be copied) From 6dc492ef44fe5eab2fbe17fe7d97e2b057e1707a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 14:19:55 -0700 Subject: [PATCH 055/191] Corrected inclusivity docstring to correctly label algorithm as inclusive in cut branches --- polymerist/genutils/treetools/treecopy.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 1424388..305461a 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -81,10 +81,11 @@ def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Opt An anytree.node.Node object which is the root of a tree-like hierarchy stop : Filter[Node] (optional), default None An optional criterion to decide when to stop traversing the tree to be copied - This criterion is not inclusive, i.e. a Node matching this criterion WILL be - included in the copied tree, but any child Nodes of this flagged node will not - Should be a function which accepts a single Node arg and returns a bool + + This criterion is inclusive, i.e. a Node matching this criterion will NOT be + included in the copied tree, nor will any of its children or their children, recursively + Return value of True will exclude all subsequent nodes on a branch, while value of True will proceed with iteration and copying If None, will default to always False (i.e. no extra stop conditions, full tree will be copied) attr_filter : Filter[str] (optional), default lambda x : True From 1b220e53d7c633d07f286a4e5b899c3ef86157e1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 14:40:31 -0700 Subject: [PATCH 056/191] Implemented anytree-to-networkx converter --- polymerist/genutils/treetools/treecopy.py | 66 +++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/treetools/treecopy.py index 305461a..09f6e1d 100644 --- a/polymerist/genutils/treetools/treecopy.py +++ b/polymerist/genutils/treetools/treecopy.py @@ -2,10 +2,12 @@ from typing import Any, Optional -from functools import reduce from anytree.node import Node +from anytree.iterators import PreOrderIter from anytree.exporter import DictExporter +from networkx import DiGraph + from ..filters import Filter, ALWAYS_TRUE_FILTER, ALWAYS_FALSE_FILTER @@ -20,6 +22,7 @@ def get_node_attrs(node : Node, attr_filter : Optional[Filter[str]]=None, includ attr_filter : Filter[str] (optional), default lambda x : True An optional criterion to decide whether an attribute should be kept Should be a function which accepts a single string arg and returns a bool + Return value of True will include an attribute in the return, while value or False will exclude it If None, will default to always True (i.e. no attributes will be screened out) include_name : bool, deafult False @@ -59,6 +62,7 @@ def copy_node_unbound(node : Node, attr_filter : Optional[Filter[str]]=None) -> attr_filter : Filter[str] (optional), default lambda x : True An optional criterion to decide whether an attribute should be kept Should be a function which accepts a single string arg and returns a bool + Return value of True will include an attribute in the return, while value or False will exclude it If None, will default to always True (i.e. no attributes will be screened out) @@ -75,7 +79,7 @@ def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Opt ''' Create a copy of an anytree Node hierarchy. Can provide filters and stop criteria to exclude nodes or whole branches - Parameters + Parameters ---------- root : Node An anytree.node.Node object which is the root of a tree-like hierarchy @@ -91,6 +95,7 @@ def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Opt attr_filter : Filter[str] (optional), default lambda x : True An optional criterion to decide whether an attribute should be kept Should be a function which accepts a single string arg and returns a bool + Return value of True will include an attribute in the return, while value or False will exclude it If None, will default to always True (i.e. no attributes will be screened out) @@ -109,4 +114,59 @@ def copy_tree(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Opt child_copy = copy_tree(child, stop=stop, attr_filter=attr_filter) # recursively copy children until stop criterion child_copy.parent = root_new - return root_new \ No newline at end of file + return root_new + +def tree_to_networkx(root : Node, stop : Optional[Filter[Node]]=None, attr_filter : Optional[Filter[str]]=None) -> Node: + ''' + Produces a networkx.DiGraph representation on an anytree Tree + + Parameters + ---------- + root : Node + An anytree.node.Node object which is the root of a tree-like hierarchy + stop : Filter[Node] (optional), default None + An optional criterion to decide when to stop traversing the tree to be copied + Should be a function which accepts a single Node arg and returns a bool + + Return value of True will exclude all subsequent nodes on a branch, while value of True will proceed with iteration and copying + If None, will default to always False (i.e. no extra stop conditions, full tree will be copied) + + This criterion is inclusive, i.e. a Node matching this criterion will NOT be + included in the copied tree, nor will any of its children or their children, recursively + attr_filter : Filter[str] (optional), default lambda x : True + An optional criterion to decide whether an attribute should be kept + Should be a function which accepts a single string arg and returns a bool + + Return value of True will include an attribute in the return, while value or False will exclude it + If None, will default to always True (i.e. no attributes will be screened out) + + Returns + ------- + nx_tree : diGraph + A networkx directed graph object + ''' + if stop is None: + stop = ALWAYS_FALSE_FILTER + + pruned : dict[Node, bool] = {} # which nodes should be excluded (needed for recursive branch pruning) + node_ids : dict[Node, int] = {} # unique integer index for nodes (needed to resolve duplicate-name Nodes) + nx_tree = DiGraph() + for i, node in enumerate(PreOrderIter(root)): # NOTE: DON'T CHANGE ITERATOR!, implementation here relies on the topological sorting property of pre-order traversal + node_ids[node] = i # mark node for parent identification later + if stop(node): + pruned[node] = True + continue # skip over node addition once marked + + parent_idx : Optional[int] = None + if node.parent is not None: + parent_idx = node_ids[node.parent] + if pruned[node.parent]: # if the parent node was pruned... + pruned[node] = True # ...mark the current node as also pruned... + continue # ...and skip over its addition + + nx_tree.add_node(i, label=node.name, **get_node_attrs(node, attr_filter=attr_filter, include_name=False)) + if parent_idx is not None: + nx_tree.add_edge(parent_idx, i) # parent guaranteed to have been visited first and be mapped by pre-order topological sorting + pruned[node] = False # only once a node is added to the graph should it be explicitly marked as not pruned + + return nx_tree \ No newline at end of file From b5e93e149a37b57ad5c1c8edf3dfbed580debff0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 14:57:34 -0700 Subject: [PATCH 057/191] Wrote unit tests for node attribute fetching --- .../tests/genutils/treetools/test_trees.py | 43 ++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/polymerist/tests/genutils/treetools/test_trees.py b/polymerist/tests/genutils/treetools/test_trees.py index fb7454e..57e6c7a 100644 --- a/polymerist/tests/genutils/treetools/test_trees.py +++ b/polymerist/tests/genutils/treetools/test_trees.py @@ -1,8 +1,15 @@ '''Unit tests for trees''' +from typing import Any +import pytest + from anytree.node import Node -def example_tree_for_tests() -> Node: # TODO: move to separate tests module eventually +from polymerist.genutils.treetools import treecopy + + +@pytest.fixture +def example_tree() -> Node: '''Produce a simplified tree for performing tests''' root = Node('f') b = Node('b', foo='bb', parent=root) @@ -15,4 +22,36 @@ def example_tree_for_tests() -> Node: # TODO: move to separate tests module even h = Node('h', foo='hh', parent=i) a_dup = Node('a', foo='a+a', parent=g) # testing how nodes with duplicate names are handled - return root \ No newline at end of file + return root + +@pytest.fixture +def node_name() -> str: + '''Allows for configuration of test name values (test should pass with arbitrary names)''' + return 'test' + +@pytest.fixture +def node_attrs() -> dict[str, Any]: + '''Allows for configuration of test attr values (test should pass with arbitrary attrs)''' + return { + 'foo' : 'bar', + 'baz' : 42, + 'spam' : 'eggs', + } + +def test_get_node_attrs_no_name(node_name : str, node_attrs : dict[str, Any]) -> None: + '''Test that correct Node attributes are fetched (excluding Node name)''' + node = Node(name=node_name, **node_attrs) + attrs = treecopy.get_node_attrs(node, include_name=False) + assert attrs == node_attrs + +def test_get_node_attrs_with_name(node_name : str, node_attrs : dict[str, Any]) -> None: + '''Test that correct Node attributes are fetched (including Node name)''' + node = Node(name=node_name, **node_attrs) + attrs = treecopy.get_node_attrs(node, include_name=True) + assert attrs == {'name' : node_name, **node_attrs} + +def test_get_node_attrs_attr_name_filter(node_name : str, node_attrs : dict[str, Any]) -> None: + '''Test that correct Node attributes are fetched (including Node name)''' + node = Node(name=node_name, **node_attrs) + attrs = treecopy.get_node_attrs(node, attr_filter=lambda attr_name : attr_name != 'foo', include_name=False) + assert 'foo' not in attrs \ No newline at end of file From 25a50a9a75431491f99d5f4c7dfdea223d71ca77 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 14:59:56 -0700 Subject: [PATCH 058/191] Renamed "treetools" to just "trees" --- polymerist/genutils/fileutils/filetree.py | 4 ++-- polymerist/genutils/importutils/pkgiter.py | 4 ++-- polymerist/genutils/{treetools => trees}/__init__.py | 0 polymerist/genutils/{treetools => trees}/treebase.py | 0 polymerist/genutils/{treetools => trees}/treecopy.py | 0 polymerist/genutils/{treetools => trees}/treeviz.py | 0 polymerist/tests/genutils/trees/__init__.py | 1 + polymerist/tests/genutils/{treetools => trees}/test_trees.py | 2 +- polymerist/tests/genutils/treetools/__init__.py | 1 - 9 files changed, 6 insertions(+), 6 deletions(-) rename polymerist/genutils/{treetools => trees}/__init__.py (100%) rename polymerist/genutils/{treetools => trees}/treebase.py (100%) rename polymerist/genutils/{treetools => trees}/treecopy.py (100%) rename polymerist/genutils/{treetools => trees}/treeviz.py (100%) create mode 100644 polymerist/tests/genutils/trees/__init__.py rename polymerist/tests/genutils/{treetools => trees}/test_trees.py (97%) delete mode 100644 polymerist/tests/genutils/treetools/__init__.py diff --git a/polymerist/genutils/fileutils/filetree.py b/polymerist/genutils/fileutils/filetree.py index eff2c2f..fa0de5c 100644 --- a/polymerist/genutils/fileutils/filetree.py +++ b/polymerist/genutils/fileutils/filetree.py @@ -5,8 +5,8 @@ from pathlib import Path from subprocess import Popen -from ..treetools.treebase import NodeCorrespondence, compile_tree_factory -from ..treetools.treeviz import treestr +from ..trees.treebase import NodeCorrespondence, compile_tree_factory +from ..trees.treeviz import treestr from ..decorators.functional import allow_string_paths diff --git a/polymerist/genutils/importutils/pkgiter.py b/polymerist/genutils/importutils/pkgiter.py index b3fc63a..55a3c3c 100644 --- a/polymerist/genutils/importutils/pkgiter.py +++ b/polymerist/genutils/importutils/pkgiter.py @@ -13,8 +13,8 @@ from anytree.render import AbstractStyle, ContStyle from anytree.iterators import PreOrderIter -from ..treetools.treebase import NodeCorrespondence, compile_tree_factory -from ..treetools.treeviz import treestr +from ..trees.treebase import NodeCorrespondence, compile_tree_factory +from ..trees.treeviz import treestr from .pkginspect import module_stem, is_package diff --git a/polymerist/genutils/treetools/__init__.py b/polymerist/genutils/trees/__init__.py similarity index 100% rename from polymerist/genutils/treetools/__init__.py rename to polymerist/genutils/trees/__init__.py diff --git a/polymerist/genutils/treetools/treebase.py b/polymerist/genutils/trees/treebase.py similarity index 100% rename from polymerist/genutils/treetools/treebase.py rename to polymerist/genutils/trees/treebase.py diff --git a/polymerist/genutils/treetools/treecopy.py b/polymerist/genutils/trees/treecopy.py similarity index 100% rename from polymerist/genutils/treetools/treecopy.py rename to polymerist/genutils/trees/treecopy.py diff --git a/polymerist/genutils/treetools/treeviz.py b/polymerist/genutils/trees/treeviz.py similarity index 100% rename from polymerist/genutils/treetools/treeviz.py rename to polymerist/genutils/trees/treeviz.py diff --git a/polymerist/tests/genutils/trees/__init__.py b/polymerist/tests/genutils/trees/__init__.py new file mode 100644 index 0000000..ff0aa15 --- /dev/null +++ b/polymerist/tests/genutils/trees/__init__.py @@ -0,0 +1 @@ +'''Unit tests for `trees` package''' \ No newline at end of file diff --git a/polymerist/tests/genutils/treetools/test_trees.py b/polymerist/tests/genutils/trees/test_trees.py similarity index 97% rename from polymerist/tests/genutils/treetools/test_trees.py rename to polymerist/tests/genutils/trees/test_trees.py index 57e6c7a..dec2a2f 100644 --- a/polymerist/tests/genutils/treetools/test_trees.py +++ b/polymerist/tests/genutils/trees/test_trees.py @@ -5,7 +5,7 @@ from anytree.node import Node -from polymerist.genutils.treetools import treecopy +from polymerist.genutils.trees import treecopy @pytest.fixture diff --git a/polymerist/tests/genutils/treetools/__init__.py b/polymerist/tests/genutils/treetools/__init__.py deleted file mode 100644 index 55230fd..0000000 --- a/polymerist/tests/genutils/treetools/__init__.py +++ /dev/null @@ -1 +0,0 @@ -'''Unit tests for `treetools` package''' \ No newline at end of file From ccc4cf5beaad706ed9db77e13097b97b66b95853 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 16:08:27 -0700 Subject: [PATCH 059/191] Wrote unit tests for node copying --- polymerist/tests/genutils/trees/test_trees.py | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/polymerist/tests/genutils/trees/test_trees.py b/polymerist/tests/genutils/trees/test_trees.py index dec2a2f..ec69acc 100644 --- a/polymerist/tests/genutils/trees/test_trees.py +++ b/polymerist/tests/genutils/trees/test_trees.py @@ -4,26 +4,32 @@ import pytest from anytree.node import Node +from anytree.search import find as find_node -from polymerist.genutils.trees import treecopy +from polymerist.genutils.trees.treecopy import get_node_attrs, copy_node_unbound, copy_tree @pytest.fixture def example_tree() -> Node: '''Produce a simplified tree for performing tests''' root = Node('f') - b = Node('b', foo='bb', parent=root) - a = Node('a', foo='aa', parent=b) - d = Node('d', foo='dd', parent=b) - c = Node('c', foo='cc', parent=d) - e = Node('e', foo='ee', parent=d) - g = Node('g', foo='gg', parent=root) - i = Node('i', foo='ii', parent=g) - h = Node('h', foo='hh', parent=i) + b = Node('b', parent=root, foo='bb') + a = Node('a', parent=b, foo='aa', bar='bar') + d = Node('d', parent=b, foo='dd') + c = Node('c', parent=d, foo='cc', bar='bar') + e = Node('e', parent=d, foo='ee', baz=[1,2,3]) + g = Node('g', parent=root, foo='gg', bar='bar', baz=[1,2,3]) + i = Node('i', parent=g, foo='ii') + h = Node('h', parent=i, foo='hh') a_dup = Node('a', foo='a+a', parent=g) # testing how nodes with duplicate names are handled return root +@pytest.fixture +def g_node(example_tree) -> Node: + '''Returns a particularly attribute-rich and neighbor-rich node for testing''' + return find_node(example_tree, filter_=lambda node : node.name == 'g') + @pytest.fixture def node_name() -> str: '''Allows for configuration of test name values (test should pass with arbitrary names)''' @@ -41,17 +47,27 @@ def node_attrs() -> dict[str, Any]: def test_get_node_attrs_no_name(node_name : str, node_attrs : dict[str, Any]) -> None: '''Test that correct Node attributes are fetched (excluding Node name)''' node = Node(name=node_name, **node_attrs) - attrs = treecopy.get_node_attrs(node, include_name=False) + attrs = get_node_attrs(node, include_name=False) assert attrs == node_attrs def test_get_node_attrs_with_name(node_name : str, node_attrs : dict[str, Any]) -> None: '''Test that correct Node attributes are fetched (including Node name)''' node = Node(name=node_name, **node_attrs) - attrs = treecopy.get_node_attrs(node, include_name=True) + attrs = get_node_attrs(node, include_name=True) assert attrs == {'name' : node_name, **node_attrs} def test_get_node_attrs_attr_name_filter(node_name : str, node_attrs : dict[str, Any]) -> None: - '''Test that correct Node attributes are fetched (including Node name)''' + '''Test that filter conditions are correctly applied when fetching Node attributes''' node = Node(name=node_name, **node_attrs) - attrs = treecopy.get_node_attrs(node, attr_filter=lambda attr_name : attr_name != 'foo', include_name=False) - assert 'foo' not in attrs \ No newline at end of file + attrs = get_node_attrs(node, attr_filter=lambda attr_name : attr_name != 'foo', include_name=False) + assert 'foo' not in attrs + +def test_copy_node_unbound_values(g_node : Node) -> None: + '''Test that copy_node_unbound() correctly copies Node attributes''' + copy_node = copy_node_unbound(g_node) + assert get_node_attrs(copy_node) == get_node_attrs(g_node) + +def test_copy_node_unbound_relatives(g_node : Node) -> None: + '''Test that copy_node_unbound() correctly removes ancestors and children from node''' + copy_node = copy_node_unbound(g_node) + assert copy_node.parent is None and not copy_node.children \ No newline at end of file From fe817bcb81585a98d7419af9e2429c3cf5b4a0a6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 16:25:07 -0700 Subject: [PATCH 060/191] Wrote unit tests for tree copying --- polymerist/tests/genutils/trees/test_trees.py | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/polymerist/tests/genutils/trees/test_trees.py b/polymerist/tests/genutils/trees/test_trees.py index ec69acc..b28f014 100644 --- a/polymerist/tests/genutils/trees/test_trees.py +++ b/polymerist/tests/genutils/trees/test_trees.py @@ -5,6 +5,7 @@ from anytree.node import Node from anytree.search import find as find_node +from anytree.iterators import AbstractIter, PreOrderIter, PostOrderIter, LevelOrderIter from polymerist.genutils.trees.treecopy import get_node_attrs, copy_node_unbound, copy_tree @@ -62,12 +63,46 @@ def test_get_node_attrs_attr_name_filter(node_name : str, node_attrs : dict[str, attrs = get_node_attrs(node, attr_filter=lambda attr_name : attr_name != 'foo', include_name=False) assert 'foo' not in attrs + def test_copy_node_unbound_values(g_node : Node) -> None: '''Test that copy_node_unbound() correctly copies Node attributes''' - copy_node = copy_node_unbound(g_node) - assert get_node_attrs(copy_node) == get_node_attrs(g_node) + copied_node = copy_node_unbound(g_node) + assert get_node_attrs(copied_node) == get_node_attrs(g_node) def test_copy_node_unbound_relatives(g_node : Node) -> None: '''Test that copy_node_unbound() correctly removes ancestors and children from node''' - copy_node = copy_node_unbound(g_node) - assert copy_node.parent is None and not copy_node.children \ No newline at end of file + copied_node = copy_node_unbound(g_node) + assert copied_node.parent is None and not copied_node.children + + +@pytest.mark.parametrize('iter_type', [PreOrderIter, PostOrderIter, LevelOrderIter]) +def test_copy_tree(example_tree : Node, iter_type : AbstractIter) -> None: + '''Test that trees structures are exactly copied with no filters''' + copied_tree = copy_tree(example_tree, stop=None, attr_filter=None) + assert all( + get_node_attrs(node_orig) == get_node_attrs(node_copied) + for node_orig, node_copied in zip(iter_type(example_tree), iter_type(copied_tree)) + ) + +@pytest.mark.parametrize('iter_type', [PreOrderIter, PostOrderIter, LevelOrderIter]) +def test_copy_tree_mod_attr(example_tree : Node, iter_type : AbstractIter) -> None: + '''Test that modifying attributes on copied trees does NOT affect original''' + copied_tree = copy_tree(example_tree, stop=None, attr_filter=None) + TARG_ATTR : str = 'foo' + for node in iter_type(copied_tree): # modify attributes + if hasattr(node, TARG_ATTR): + setattr(node, TARG_ATTR, getattr(node, TARG_ATTR) + '__') + + assert all( # attributes should NOT be equal, since they should've only been changed on the copy + getattr(node_orig, TARG_ATTR) != getattr(node_copied, TARG_ATTR) + for node_orig, node_copied in zip(iter_type(example_tree), iter_type(copied_tree)) + if hasattr(node_orig, TARG_ATTR) + ) + +def test_copy_tree_stop(example_tree : Node) -> None: + '''Test that stop conditions for tree copying are respected and targetted branches are pruned''' + copied_tree = copy_tree(example_tree, stop=lambda node : node.name == 'a', attr_filter=None) + assert all( + copied_node.name != 'a' + for copied_node in PreOrderIter(copied_tree) # in this case, the iteration order uniquely doesn't matter, only care that all nodes are traversed + ) \ No newline at end of file From cc4e41e7e7ca46979a6e41f21269d3de94ec071a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 17:09:44 -0700 Subject: [PATCH 061/191] Changed networkx mirror of Node name attribute to be called "name", instead of "label" --- polymerist/genutils/trees/treecopy.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/polymerist/genutils/trees/treecopy.py b/polymerist/genutils/trees/treecopy.py index 09f6e1d..a6e1918 100644 --- a/polymerist/genutils/trees/treecopy.py +++ b/polymerist/genutils/trees/treecopy.py @@ -164,7 +164,11 @@ def tree_to_networkx(root : Node, stop : Optional[Filter[Node]]=None, attr_filte pruned[node] = True # ...mark the current node as also pruned... continue # ...and skip over its addition - nx_tree.add_node(i, label=node.name, **get_node_attrs(node, attr_filter=attr_filter, include_name=False)) + nx_tree.add_node( + i, + name=node.name, # label=node.name # NOTE: attr "name" conflicts with pydot Node "name" init attribute; consider using "label" or similar for uniqueness? + **get_node_attrs(node, attr_filter=attr_filter, include_name=False) + ) if parent_idx is not None: nx_tree.add_edge(parent_idx, i) # parent guaranteed to have been visited first and be mapped by pre-order topological sorting pruned[node] = False # only once a node is added to the graph should it be explicitly marked as not pruned From aee97bcf2428285d6103b05ac29db15cf3888050 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 17:09:58 -0700 Subject: [PATCH 062/191] Wrote unit test for anytree-to-networkx conversion --- polymerist/tests/genutils/trees/test_trees.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/polymerist/tests/genutils/trees/test_trees.py b/polymerist/tests/genutils/trees/test_trees.py index b28f014..fdfe98e 100644 --- a/polymerist/tests/genutils/trees/test_trees.py +++ b/polymerist/tests/genutils/trees/test_trees.py @@ -7,7 +7,9 @@ from anytree.search import find as find_node from anytree.iterators import AbstractIter, PreOrderIter, PostOrderIter, LevelOrderIter -from polymerist.genutils.trees.treecopy import get_node_attrs, copy_node_unbound, copy_tree +from networkx import dfs_preorder_nodes + +from polymerist.genutils.trees.treecopy import get_node_attrs, copy_node_unbound, copy_tree, tree_to_networkx @pytest.fixture @@ -15,11 +17,11 @@ def example_tree() -> Node: '''Produce a simplified tree for performing tests''' root = Node('f') b = Node('b', parent=root, foo='bb') - a = Node('a', parent=b, foo='aa', bar='bar') + a = Node('a', parent=b, foo='aa', bar='spam') d = Node('d', parent=b, foo='dd') - c = Node('c', parent=d, foo='cc', bar='bar') + c = Node('c', parent=d, foo='cc', bar='spam') e = Node('e', parent=d, foo='ee', baz=[1,2,3]) - g = Node('g', parent=root, foo='gg', bar='bar', baz=[1,2,3]) + g = Node('g', parent=root, foo='gg', bar='spam', baz=[1,2,3]) i = Node('i', parent=g, foo='ii') h = Node('h', parent=i, foo='hh') a_dup = Node('a', foo='a+a', parent=g) # testing how nodes with duplicate names are handled @@ -105,4 +107,13 @@ def test_copy_tree_stop(example_tree : Node) -> None: assert all( copied_node.name != 'a' for copied_node in PreOrderIter(copied_tree) # in this case, the iteration order uniquely doesn't matter, only care that all nodes are traversed + ) + + +def test_tree_to_networkx(example_tree : Node) -> None: + '''Test that conversion to networkx.DiGraph faithfully reproduces node order and attributes''' + nxtree = tree_to_networkx(example_tree) + assert all( + nxtree.nodes[i] == get_node_attrs(at_node, include_name=True) + for i, at_node in zip(dfs_preorder_nodes(nxtree), PreOrderIter(example_tree)) # preorder should match, regardless of implementation ) \ No newline at end of file From 6b878dad926b115dd115e6035481e7061b0f0600 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 17:54:26 -0700 Subject: [PATCH 063/191] Fixed premature return in submodule_loggers() --- polymerist/genutils/logutils/IOHandlers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/logutils/IOHandlers.py b/polymerist/genutils/logutils/IOHandlers.py index 1d83dd8..8fc9530 100644 --- a/polymerist/genutils/logutils/IOHandlers.py +++ b/polymerist/genutils/logutils/IOHandlers.py @@ -53,14 +53,14 @@ def submodule_loggers(module : ModuleType, recursive : bool=True, blacklist : Op logger_registry = {} for module in iter_submodules(module, recursive=recursive, blacklist=blacklist): full_module_name = module.__name__ - module_logger = logging.root.manager.loggerDict.get(full_module_name, None) - if isinstance(module, logging.PlaceHolder): # exclude dummy Placeholder loggers - module_logger = None + module_logger = get_logger_registry().get(full_module_name, None) + if isinstance(module, logging.PlaceHolder): + continue # exclude dummy Placeholder loggers - if not (sparse and module_logger is None): + if not (sparse and (module_logger is None)): logger_registry[full_module_name] = module_logger - return logger_registry + return logger_registry # FILE-STREAM HANDLING CLASSES class MultiStreamFileHandler(logging.FileHandler): From cb616181fae6cd71abf2eb1066a6ed56a9a2aed6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 17:55:10 -0700 Subject: [PATCH 064/191] Privatized pkgutils.iter_modules() to avoid confusion with iter_submodules() --- polymerist/genutils/importutils/pkgiter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/importutils/pkgiter.py b/polymerist/genutils/importutils/pkgiter.py index 55a3c3c..7e10b08 100644 --- a/polymerist/genutils/importutils/pkgiter.py +++ b/polymerist/genutils/importutils/pkgiter.py @@ -7,7 +7,7 @@ from typing import Generator, Iterable, Optional, Union from importlib import import_module -from pkgutil import iter_modules +from pkgutil import iter_modules as _iter_modules from anytree.node import Node from anytree.render import AbstractStyle, ContStyle @@ -29,7 +29,7 @@ def has_children(self, module : ModuleType) -> bool: return is_package(module) def children(self, module : ModuleType) -> Iterable[ModuleType]: - for _loader, module_name, ispkg in iter_modules(module.__path__, prefix=module.__name__+'.'): + for _loader, module_name, ispkg in _iter_modules(module.__path__, prefix=module.__name__+'.'): try: submodule = import_module(module_name) yield submodule From d93465d13112f6da60e8ceb138e24f3e46f640eb Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 17:55:38 -0700 Subject: [PATCH 065/191] Reimplemented extract_imports_from_module() with more robust package and module checks from pkginspect --- polymerist/genutils/importutils/pyimports.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/polymerist/genutils/importutils/pyimports.py b/polymerist/genutils/importutils/pyimports.py index dc3b05e..4b1789c 100644 --- a/polymerist/genutils/importutils/pyimports.py +++ b/polymerist/genutils/importutils/pyimports.py @@ -8,6 +8,7 @@ from pathlib import Path from ..decorators.functional import allow_string_paths # TODO: see if there's anyway to bypass a relative import here +from .pkginspect import is_module, is_package @dataclass @@ -69,9 +70,7 @@ def extract_imports_from_dir(source_dir : Path) -> list[ImportedObjectInfo]: def extract_imports_from_module(module : ModuleType) -> list[ImportedObjectInfo]: '''Compiles info from all Python imports in a Python (.py) file''' - # TODO: find more reliable/canonical way to tell packages and bare modules/scripts apart - if hasattr(module, '__file__') and getattr(module, '__file__') is not None: - return extract_imports_from_pyfile(module.__file__) - - if hasattr(module, '__path__') and getattr(module, '__path__') is not None: - return extract_imports_from_dir(module.__path__[0]) \ No newline at end of file + if is_package(module): + return extract_imports_from_dir(module.__path__[0]) # TODO: provide package-specific, non-recursive implementation of this + else: # all packages a re modules, but not all modules a re packages; hence, the check must be done in this order + return extract_imports_from_pyfile(module.__file__) \ No newline at end of file From 2ba0996b020591545bdaccaa5522bda48f920dbc Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 17:56:05 -0700 Subject: [PATCH 066/191] Exposed submodule classes and functions at the subpackage levels --- polymerist/genutils/importutils/__init__.py | 2 +- polymerist/genutils/trees/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/importutils/__init__.py b/polymerist/genutils/importutils/__init__.py index 1486139..4a3662c 100644 --- a/polymerist/genutils/importutils/__init__.py +++ b/polymerist/genutils/importutils/__init__.py @@ -1,4 +1,4 @@ '''Functionality for dynamically importing and inspecting Python modules and packages''' -from .pkgiter import module_hierarchy, iter_submodules +from .pkgiter import module_hierarchy, iter_submodules, module_tree, module_tree_direct from .pkginspect import is_package, is_module \ No newline at end of file diff --git a/polymerist/genutils/trees/__init__.py b/polymerist/genutils/trees/__init__.py index c93e9b7..6e3bfe3 100644 --- a/polymerist/genutils/trees/__init__.py +++ b/polymerist/genutils/trees/__init__.py @@ -1,5 +1,5 @@ '''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' from .treebase import NodeCorrespondence, compile_tree_factory -from .treecopy import copy_tree +from .treecopy import get_node_attrs, copy_tree, tree_to_networkx from .treeviz import treestr \ No newline at end of file From a7bc177e3ec99f817b19ee361fb44e947232c6c6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 18:01:43 -0700 Subject: [PATCH 067/191] Added "dev" branch to auto-test action on coomit + PR --- .github/workflows/CI.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/CI.yaml b/.github/workflows/CI.yaml index 7633ed0..213121a 100644 --- a/.github/workflows/CI.yaml +++ b/.github/workflows/CI.yaml @@ -6,9 +6,11 @@ on: push: branches: - "main" + - "dev" pull_request: branches: - "main" + - "dev" schedule: # Weekly tests run on main by default: # Scheduled workflows run on the latest commit on the default or base branch. From 6ec9f06d62f594ebe111458ed98ff87143c1f64f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 18:04:37 -0700 Subject: [PATCH 068/191] Baked importutils directly into toplevel module --- polymerist/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/polymerist/__init__.py b/polymerist/__init__.py index e856bb9..7dd128d 100644 --- a/polymerist/__init__.py +++ b/polymerist/__init__.py @@ -2,6 +2,7 @@ # Add imports here from ._version import __version__ +from .genutils import importutils # from .genutils.importutils import register_submodules, module_by_pkg_str # _MODULE_SELF = module_by_pkg_str(__package__) # keep reference to own module From 2220afc43fc5ca3acdd8e78574487debe3729e28 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 19:49:45 -0700 Subject: [PATCH 069/191] Consolidated functgroups.records with _daylight_scrape --- .../smileslib/functgroups/_daylight_scrape.py | 17 ++++++++++++++++- polymerist/smileslib/functgroups/records.py | 13 ------------- 2 files changed, 16 insertions(+), 14 deletions(-) delete mode 100644 polymerist/smileslib/functgroups/records.py diff --git a/polymerist/smileslib/functgroups/_daylight_scrape.py b/polymerist/smileslib/functgroups/_daylight_scrape.py index fb33f91..5ea9369 100644 --- a/polymerist/smileslib/functgroups/_daylight_scrape.py +++ b/polymerist/smileslib/functgroups/_daylight_scrape.py @@ -1,9 +1,24 @@ +'''Backend web-scraping to (re)build SMARTS lookup table from the Daylight SMARTS official site''' + +from dataclasses import dataclass + import requests from bs4 import BeautifulSoup + import pandas as pd -from .records import FnGroupSMARTSEntry +@dataclass(frozen=True) +class FnGroupSMARTSEntry: + '''For encapuslating SMARTS group info from Daylight SMARTS registry''' + category : str + category_desc : str + + group_type : str + group_name : str + + SMARTS : str + SMARTS_desc : str DAYLIGHT_URL = 'https://www.daylight.com/dayhtml_tutorials/languages/smarts/smarts_examples.html' diff --git a/polymerist/smileslib/functgroups/records.py b/polymerist/smileslib/functgroups/records.py deleted file mode 100644 index c2b451c..0000000 --- a/polymerist/smileslib/functgroups/records.py +++ /dev/null @@ -1,13 +0,0 @@ -from dataclasses import dataclass - -@dataclass(frozen=True) -class FnGroupSMARTSEntry: - '''For encapuslating SMARTS group info from Daylight SMARTS registry''' - category : str - category_desc : str - - group_type : str - group_name : str - - SMARTS : str - SMARTS_desc : str From 0c4c0c32abac138a00d8d78f9f612eb33ad281a2 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 19:50:03 -0700 Subject: [PATCH 070/191] Inserted missing docstrings --- polymerist/genutils/logutils/__init__.py | 1 + polymerist/genutils/sequences/discernment/__init__.py | 3 ++- polymerist/smileslib/functgroups/__init__.py | 5 ++--- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/polymerist/genutils/logutils/__init__.py b/polymerist/genutils/logutils/__init__.py index e69de29..9c22d8d 100644 --- a/polymerist/genutils/logutils/__init__.py +++ b/polymerist/genutils/logutils/__init__.py @@ -0,0 +1 @@ +'''Utilities for tracking, wrapping, and redirecting logging output''' \ No newline at end of file diff --git a/polymerist/genutils/sequences/discernment/__init__.py b/polymerist/genutils/sequences/discernment/__init__.py index 3c058c3..1175acb 100644 --- a/polymerist/genutils/sequences/discernment/__init__.py +++ b/polymerist/genutils/sequences/discernment/__init__.py @@ -1,4 +1,5 @@ -'''Tools for solving the DISCERNMENT (Determination of Index Sequences from Complete Enumeration of Ransom Notes - Multiset Extension with Nonlexical Types) problem +''' +Tools for solving the DISCERNMENT (Determination of Index Sequences from Complete Enumeration of Ransom Notes - Multiset Extension with Nonlexical Types) problem DISCERNMENT problem definition: Given a "word" (a sequence of N symbols of type T), and a mapped sequence of "bins" (ordered collection of multisets of type T, each assigned a label of type L), diff --git a/polymerist/smileslib/functgroups/__init__.py b/polymerist/smileslib/functgroups/__init__.py index 9a7fa68..3c02dc7 100644 --- a/polymerist/smileslib/functgroups/__init__.py +++ b/polymerist/smileslib/functgroups/__init__.py @@ -8,11 +8,10 @@ from pathlib import Path _MODULE_PATH = Path(__file__).parent -# load/generating functional group smarts table -from . import _daylight_scrape -from .records import FnGroupSMARTSEntry import pandas as pd +from ._daylight_scrape import scrape_SMARTS, FnGroupSMARTSEntry + _fgtab_name : str = 'fn_group_smarts' _fgtab_path = _MODULE_PATH / f'{_fgtab_name}.csv' From eedb22fa8e1f2276a8c7d3fab48d938965289755 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 18 Nov 2024 22:06:05 -0700 Subject: [PATCH 071/191] Fixed docstring typo --- polymerist/unitutils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/unitutils/__init__.py b/polymerist/unitutils/__init__.py index e2a6907..831307c 100644 --- a/polymerist/unitutils/__init__.py +++ b/polymerist/unitutils/__init__.py @@ -1 +1 @@ -'''Physical constants, dimensioanl analysis, and unit conversion utilities''' \ No newline at end of file +'''Physical constants, dimensional analysis, and unit conversion utilities''' \ No newline at end of file From 1b7ef7d281da5462deacecc20c6f5fa1001b6b5b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 19 Nov 2024 15:17:07 -0700 Subject: [PATCH 072/191] Updated module-level docstring and author tags --- polymerist/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/polymerist/__init__.py b/polymerist/__init__.py index 7dd128d..ab54ffc 100644 --- a/polymerist/__init__.py +++ b/polymerist/__init__.py @@ -1,4 +1,7 @@ -"""A unified set of tools for setting up general organic polymer systems for MD within the OpenFF framework""" +"""A unified set of tools for setting up general organic polymer systems for molecular dynamics""" + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' # Add imports here from ._version import __version__ From f8541eb3bc24f8f4726255b300a041440cae4c2e Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 19 Nov 2024 16:15:56 -0700 Subject: [PATCH 073/191] Inserted __author__ and __email__ tags throughout all modules --- polymerist/analysis/__init__.py | 5 ++++- polymerist/analysis/calculation.py | 3 +++ polymerist/analysis/mdtrajutils.py | 3 +++ polymerist/data/__init__.py | 5 ++++- polymerist/genutils/__init__.py | 5 ++++- polymerist/genutils/attrs.py | 3 +++ polymerist/genutils/bits.py | 3 +++ polymerist/genutils/containers.py | 3 +++ polymerist/genutils/decorators/__init__.py | 5 ++++- polymerist/genutils/decorators/classmod.py | 3 +++ polymerist/genutils/decorators/functional.py | 3 +++ polymerist/genutils/decorators/meta.py | 3 +++ polymerist/genutils/decorators/signatures.py | 3 +++ polymerist/genutils/duration.py | 3 +++ polymerist/genutils/fileutils/__init__.py | 5 ++++- polymerist/genutils/fileutils/extensions.py | 3 +++ polymerist/genutils/fileutils/filetree.py | 3 +++ polymerist/genutils/fileutils/jsonio/__init__.py | 3 +++ polymerist/genutils/fileutils/jsonio/jsonify.py | 3 +++ polymerist/genutils/fileutils/jsonio/serialize.py | 3 +++ polymerist/genutils/fileutils/jsonio/update.py | 3 +++ polymerist/genutils/fileutils/pathutils.py | 3 +++ polymerist/genutils/filters.py | 3 +++ polymerist/genutils/importutils/__init__.py | 3 +++ polymerist/genutils/importutils/pkginspect.py | 3 +++ polymerist/genutils/importutils/pkgiter.py | 3 +++ polymerist/genutils/importutils/pyimports.py | 3 +++ polymerist/genutils/iteration.py | 3 +++ polymerist/genutils/logutils/IOHandlers.py | 3 +++ polymerist/genutils/logutils/__init__.py | 5 ++++- polymerist/genutils/logutils/timestamps.py | 3 +++ polymerist/genutils/sequences/__init__.py | 3 +++ polymerist/genutils/sequences/discernment/__init__.py | 4 ++++ polymerist/genutils/sequences/discernment/enumeration.py | 3 +++ polymerist/genutils/sequences/discernment/inventory.py | 3 +++ polymerist/genutils/sequences/discernment/strategies.py | 3 +++ polymerist/genutils/sequences/seqops.py | 3 +++ polymerist/genutils/sequences/similarity/__init__.py | 5 ++++- polymerist/genutils/sequences/similarity/distances.py | 3 +++ polymerist/genutils/sequences/similarity/edits.py | 3 +++ polymerist/genutils/textual/__init__.py | 3 +++ polymerist/genutils/textual/casing.py | 3 +++ polymerist/genutils/textual/delimiters.py | 3 +++ polymerist/genutils/textual/encoding.py | 3 +++ polymerist/genutils/textual/interpolation.py | 3 +++ polymerist/genutils/textual/ordinals.py | 3 +++ polymerist/genutils/textual/prettyprint.py | 3 +++ polymerist/genutils/textual/strsearch.py | 3 +++ polymerist/genutils/trees/__init__.py | 3 +++ polymerist/genutils/trees/treebase.py | 3 +++ polymerist/genutils/trees/treecopy.py | 3 +++ polymerist/genutils/trees/treeviz.py | 3 +++ polymerist/genutils/typetools/__init__.py | 5 ++++- polymerist/genutils/typetools/categorical.py | 3 +++ polymerist/genutils/typetools/numpytypes.py | 3 +++ polymerist/genutils/typetools/parametric.py | 3 +++ polymerist/graphics/__init__.py | 5 ++++- polymerist/graphics/colors.py | 3 +++ polymerist/graphics/imageutils.py | 3 +++ polymerist/graphics/named_colors.py | 3 +++ polymerist/graphics/plotutils.py | 3 +++ polymerist/maths/__init__.py | 5 ++++- polymerist/maths/combinatorics/__init__.py | 5 ++++- polymerist/maths/combinatorics/numbers.py | 3 +++ polymerist/maths/combinatorics/partitions.py | 3 +++ polymerist/maths/combinatorics/permutations.py | 3 +++ polymerist/maths/fractions/__init__.py | 5 ++++- polymerist/maths/fractions/continued.py | 3 +++ polymerist/maths/fractions/ratios.py | 3 +++ polymerist/maths/greek/__init__.py | 3 +++ polymerist/maths/greek/alphabet.py | 3 +++ polymerist/maths/greek/prefixes.py | 3 +++ polymerist/maths/lattices/__init__.py | 5 ++++- polymerist/maths/lattices/bravais.py | 3 +++ polymerist/maths/lattices/coordinates.py | 3 +++ polymerist/maths/lattices/integral.py | 3 +++ polymerist/maths/linearalg/__init__.py | 5 ++++- polymerist/maths/linearalg/affine.py | 3 +++ polymerist/maths/linearalg/decomposition.py | 3 +++ polymerist/maths/numbersys/__init__.py | 3 +++ polymerist/maths/numbersys/commonbases.py | 3 +++ polymerist/maths/numbersys/positional.py | 3 +++ polymerist/maths/primes.py | 3 +++ polymerist/maths/statistics.py | 3 +++ polymerist/mdtools/__init__.py | 5 ++++- polymerist/mdtools/lammpstools/__init__.py | 5 ++++- polymerist/mdtools/lammpstools/lammpseval.py | 3 +++ polymerist/mdtools/lammpstools/unitstyles.py | 3 +++ polymerist/mdtools/openfftools/__init__.py | 3 +++ polymerist/mdtools/openfftools/boxvectors.py | 3 +++ polymerist/mdtools/openfftools/omminter.py | 3 +++ polymerist/mdtools/openfftools/partialcharge/__init__.py | 5 ++++- .../mdtools/openfftools/partialcharge/chargemethods.py | 3 +++ polymerist/mdtools/openfftools/partialcharge/molchargers.py | 3 +++ .../mdtools/openfftools/partialcharge/rescharge/__init__.py | 5 ++++- .../openfftools/partialcharge/rescharge/calculation.py | 3 +++ .../mdtools/openfftools/partialcharge/rescharge/interface.py | 3 +++ .../mdtools/openfftools/partialcharge/rescharge/rctypes.py | 3 +++ .../openfftools/partialcharge/rescharge/redistribution.py | 3 +++ polymerist/mdtools/openfftools/partition.py | 1 + polymerist/mdtools/openfftools/solvation/__init__.py | 5 ++++- polymerist/mdtools/openfftools/solvation/packing.py | 3 +++ polymerist/mdtools/openfftools/solvation/physprops.py | 3 +++ .../mdtools/openfftools/solvation/solvents/__init__.py | 3 +++ polymerist/mdtools/openfftools/topology.py | 3 +++ polymerist/mdtools/openmmtools/__init__.py | 5 ++++- polymerist/mdtools/openmmtools/description.py | 3 +++ polymerist/mdtools/openmmtools/evaluation.py | 3 +++ polymerist/mdtools/openmmtools/execution.py | 3 +++ polymerist/mdtools/openmmtools/forcegroups.py | 3 +++ polymerist/mdtools/openmmtools/parameters.py | 3 +++ polymerist/mdtools/openmmtools/preparation.py | 3 +++ polymerist/mdtools/openmmtools/reporters.py | 3 +++ polymerist/mdtools/openmmtools/serialization.py | 3 +++ polymerist/mdtools/openmmtools/thermo.py | 3 +++ polymerist/polymerist.py | 2 ++ polymerist/polymers/__init__.py | 5 ++++- polymerist/polymers/building.py | 3 +++ polymerist/polymers/estimation.py | 3 +++ polymerist/polymers/exceptions.py | 3 +++ polymerist/polymers/monographs.py | 3 +++ polymerist/polymers/monomers/__init__.py | 3 +++ polymerist/polymers/monomers/repr.py | 3 +++ polymerist/polymers/monomers/specification.py | 3 +++ polymerist/polymers/smidgelib/__init__.py | 3 +++ polymerist/polymers/smidgelib/smidgebonds.py | 3 +++ polymerist/polymers/smidgelib/smidgeread.py | 3 +++ polymerist/polymers/smidgelib/smidgewrite.py | 3 +++ polymerist/rdutils/__init__.py | 5 ++++- polymerist/rdutils/bonding/__init__.py | 3 +++ polymerist/rdutils/bonding/_bonding.py | 3 +++ polymerist/rdutils/bonding/dissolution.py | 3 +++ polymerist/rdutils/bonding/formation.py | 3 +++ polymerist/rdutils/bonding/identification.py | 3 +++ polymerist/rdutils/bonding/permutation.py | 3 +++ polymerist/rdutils/bonding/portlib.py | 3 +++ polymerist/rdutils/bonding/substitution.py | 3 +++ polymerist/rdutils/labeling/__init__.py | 5 ++++- polymerist/rdutils/labeling/atomwise.py | 3 +++ polymerist/rdutils/labeling/bijection.py | 3 +++ polymerist/rdutils/labeling/bondwise.py | 3 +++ polymerist/rdutils/labeling/molwise.py | 3 +++ polymerist/rdutils/rdconvert.py | 3 +++ polymerist/rdutils/rdcoords/__init__.py | 5 ++++- polymerist/rdutils/rdcoords/tiling.py | 3 +++ polymerist/rdutils/rderrors.py | 5 ++++- polymerist/rdutils/rdgraphs.py | 3 +++ polymerist/rdutils/rdkdraw.py | 3 +++ polymerist/rdutils/rdprops.py | 3 +++ polymerist/rdutils/reactions/__init__.py | 5 ++++- polymerist/rdutils/reactions/assembly.py | 3 +++ polymerist/rdutils/reactions/fragment.py | 3 +++ polymerist/rdutils/reactions/reactexc.py | 3 +++ polymerist/rdutils/reactions/reactions.py | 3 +++ polymerist/rdutils/reactions/reactors.py | 3 +++ polymerist/smileslib/__init__.py | 3 +++ polymerist/smileslib/chemdbqueries.py | 3 +++ polymerist/smileslib/functgroups/__init__.py | 3 +++ polymerist/smileslib/functgroups/_daylight_scrape.py | 3 +++ polymerist/smileslib/primitives.py | 3 +++ polymerist/smileslib/substructures.py | 3 +++ polymerist/tests/__init__.py | 3 +++ polymerist/tests/analysis/__init__.py | 5 ++++- polymerist/tests/data/__init__.py | 5 ++++- polymerist/tests/genutils/__init__.py | 5 ++++- polymerist/tests/genutils/decorators/__init__.py | 5 ++++- polymerist/tests/genutils/fileutils/__init__.py | 5 ++++- polymerist/tests/genutils/fileutils/jsonio/__init__.py | 5 ++++- polymerist/tests/genutils/fileutils/test_filetree.py | 3 +++ polymerist/tests/genutils/logutils/__init__.py | 5 ++++- polymerist/tests/genutils/sequences/__init__.py | 5 ++++- polymerist/tests/genutils/sequences/discernment/__init__.py | 5 ++++- .../tests/genutils/sequences/discernment/test_discernment.py | 3 +++ polymerist/tests/genutils/sequences/similarity/__init__.py | 5 ++++- polymerist/tests/genutils/test_attrs.py | 3 +++ polymerist/tests/genutils/test_pkginspect.py | 3 +++ polymerist/tests/genutils/textual/__init__.py | 5 ++++- polymerist/tests/genutils/trees/__init__.py | 5 ++++- polymerist/tests/genutils/trees/test_trees.py | 3 +++ polymerist/tests/genutils/typetools/__init__.py | 5 ++++- polymerist/tests/graphics/__init__.py | 5 ++++- polymerist/tests/maths/__init__.py | 5 ++++- polymerist/tests/maths/combinatorics/__init__.py | 5 ++++- polymerist/tests/maths/fractions/__init__.py | 5 ++++- polymerist/tests/maths/greek/__init__.py | 5 ++++- polymerist/tests/maths/lattices/__init__.py | 5 ++++- polymerist/tests/maths/linearalg/__init__.py | 5 ++++- polymerist/tests/maths/numbersys/__init__.py | 5 ++++- polymerist/tests/mdtools/__init__.py | 5 ++++- polymerist/tests/mdtools/lammpstools/__init__.py | 5 ++++- polymerist/tests/mdtools/openfftools/__init__.py | 5 ++++- .../tests/mdtools/openfftools/partialcharge/__init__.py | 5 ++++- .../mdtools/openfftools/partialcharge/rescharge/__init__.py | 5 ++++- polymerist/tests/mdtools/openfftools/solvation/__init__.py | 5 ++++- .../tests/mdtools/openfftools/solvation/solvents/__init__.py | 5 ++++- polymerist/tests/mdtools/openmmtools/__init__.py | 5 ++++- polymerist/tests/polymers/__init__.py | 5 ++++- polymerist/tests/polymers/monomers/__init__.py | 5 ++++- polymerist/tests/polymers/smidgelib/__init__.py | 5 ++++- polymerist/tests/rdutils/__init__.py | 5 ++++- polymerist/tests/rdutils/bonding/__init__.py | 5 ++++- polymerist/tests/rdutils/labeling/__init__.py | 5 ++++- polymerist/tests/rdutils/rdcoords/__init__.py | 5 ++++- polymerist/tests/rdutils/reactions/__init__.py | 5 ++++- polymerist/tests/smileslib/__init__.py | 5 ++++- polymerist/tests/smileslib/functgroups/__init__.py | 5 ++++- polymerist/tests/test_polymerist.py | 3 +++ polymerist/tests/unitutils/__init__.py | 5 ++++- polymerist/unitutils/__init__.py | 5 ++++- polymerist/unitutils/dimensions.py | 3 +++ polymerist/unitutils/extraunits.py | 3 +++ polymerist/unitutils/interop.py | 3 +++ polymerist/unitutils/physconsts.py | 3 +++ polymerist/unitutils/unitstr.py | 3 +++ 214 files changed, 707 insertions(+), 67 deletions(-) diff --git a/polymerist/analysis/__init__.py b/polymerist/analysis/__init__.py index e1d4622..bfb2245 100644 --- a/polymerist/analysis/__init__.py +++ b/polymerist/analysis/__init__.py @@ -1 +1,4 @@ -'''Utilities for calculating properties from MD configurations and trajectories''' \ No newline at end of file +'''Utilities for calculating properties from MD configurations and trajectories''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/analysis/calculation.py b/polymerist/analysis/calculation.py index 89aada9..458f9db 100644 --- a/polymerist/analysis/calculation.py +++ b/polymerist/analysis/calculation.py @@ -1,5 +1,8 @@ '''Direct implementations of property calculations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import numpy as np from ..maths.linearalg.decomposition import diagonalize diff --git a/polymerist/analysis/mdtrajutils.py b/polymerist/analysis/mdtrajutils.py index bfdbbae..681ceab 100644 --- a/polymerist/analysis/mdtrajutils.py +++ b/polymerist/analysis/mdtrajutils.py @@ -1,5 +1,8 @@ '''Thin wrappers around mdtraj-implemented property calculations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Callable, Iterable, Optional, TypeAlias, Union from dataclasses import dataclass, field diff --git a/polymerist/data/__init__.py b/polymerist/data/__init__.py index 309d669..2467e5c 100644 --- a/polymerist/data/__init__.py +++ b/polymerist/data/__init__.py @@ -1 +1,4 @@ -'''Additional data shipped along with polymerist source code''' \ No newline at end of file +'''Additional data shipped along with polymerist source code''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/__init__.py b/polymerist/genutils/__init__.py index 52fa541..800747a 100644 --- a/polymerist/genutils/__init__.py +++ b/polymerist/genutils/__init__.py @@ -1 +1,4 @@ -'''General-purpose utilities constructed only with Python builtins + numpy''' \ No newline at end of file +'''General-purpose utilities constructed only with Python builtins + numpy''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/attrs.py b/polymerist/genutils/attrs.py index f94a9dd..09c61ed 100644 --- a/polymerist/genutils/attrs.py +++ b/polymerist/genutils/attrs.py @@ -1,5 +1,8 @@ '''For dynamically inspecting and modifying attributes of Python objects''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Optional, Union import re diff --git a/polymerist/genutils/bits.py b/polymerist/genutils/bits.py index 2631b5e..aa8a828 100644 --- a/polymerist/genutils/bits.py +++ b/polymerist/genutils/bits.py @@ -1,5 +1,8 @@ '''For bitwise operations and conversions to/from bitstrings''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union diff --git a/polymerist/genutils/containers.py b/polymerist/genutils/containers.py index bdbca36..f92c987 100644 --- a/polymerist/genutils/containers.py +++ b/polymerist/genutils/containers.py @@ -1,5 +1,8 @@ '''Custom data containers with useful properties''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Iterable, TypeVar T = TypeVar('T') # generic type variable diff --git a/polymerist/genutils/decorators/__init__.py b/polymerist/genutils/decorators/__init__.py index 3bd0e38..7ad9a34 100644 --- a/polymerist/genutils/decorators/__init__.py +++ b/polymerist/genutils/decorators/__init__.py @@ -1 +1,4 @@ -'''Decorators for modifying functions and classes. Supply useful behaviors and/or eliminate boilerplate''' \ No newline at end of file +'''Decorators for modifying functions and classes. Supply useful behaviors and/or eliminate boilerplate''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/decorators/classmod.py b/polymerist/genutils/decorators/classmod.py index 592f469..814acbf 100644 --- a/polymerist/genutils/decorators/classmod.py +++ b/polymerist/genutils/decorators/classmod.py @@ -1,5 +1,8 @@ '''Decorators for modifying classes''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Iterable, Optional, TypeVar, Union C = TypeVar('C') diff --git a/polymerist/genutils/decorators/functional.py b/polymerist/genutils/decorators/functional.py index 51fa881..5504c93 100644 --- a/polymerist/genutils/decorators/functional.py +++ b/polymerist/genutils/decorators/functional.py @@ -1,5 +1,8 @@ '''Decorators for modifying functions''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Iterable, Optional, Type, Union from inspect import signature, Parameter diff --git a/polymerist/genutils/decorators/meta.py b/polymerist/genutils/decorators/meta.py index 367185d..082dfd6 100644 --- a/polymerist/genutils/decorators/meta.py +++ b/polymerist/genutils/decorators/meta.py @@ -1,5 +1,8 @@ '''Decorators for modifying other decorators''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Concatenate, Callable, ParamSpec, TypeAlias, TypeVar from functools import update_wrapper, wraps diff --git a/polymerist/genutils/decorators/signatures.py b/polymerist/genutils/decorators/signatures.py index bccc639..26076e3 100644 --- a/polymerist/genutils/decorators/signatures.py +++ b/polymerist/genutils/decorators/signatures.py @@ -1,5 +1,8 @@ '''Tools for simplifying transfer and modification of wrapped function type signatures''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from inspect import Parameter, Signature POSITIONAL_PARAMETER_TYPES = [ diff --git a/polymerist/genutils/duration.py b/polymerist/genutils/duration.py index 82d1ea5..9227d20 100644 --- a/polymerist/genutils/duration.py +++ b/polymerist/genutils/duration.py @@ -1,5 +1,8 @@ '''Utilities for representing, converting, and formatting amounts of time''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import ClassVar, Union, TypeAlias from dataclasses import dataclass, field diff --git a/polymerist/genutils/fileutils/__init__.py b/polymerist/genutils/fileutils/__init__.py index 00c1f7d..78b7179 100644 --- a/polymerist/genutils/fileutils/__init__.py +++ b/polymerist/genutils/fileutils/__init__.py @@ -1 +1,4 @@ -'''Utilities for manipulating Path-like objects and interfacing with directories and files''' \ No newline at end of file +'''Utilities for manipulating Path-like objects and interfacing with directories and files''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/fileutils/extensions.py b/polymerist/genutils/fileutils/extensions.py index 9c14f86..8c19b42 100644 --- a/polymerist/genutils/fileutils/extensions.py +++ b/polymerist/genutils/fileutils/extensions.py @@ -1,5 +1,8 @@ '''Utilities for categorizing and representing file extensions/suffixes''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import mimetypes from collections import defaultdict diff --git a/polymerist/genutils/fileutils/filetree.py b/polymerist/genutils/fileutils/filetree.py index fa0de5c..1a3ae3e 100644 --- a/polymerist/genutils/fileutils/filetree.py +++ b/polymerist/genutils/fileutils/filetree.py @@ -1,5 +1,8 @@ '''Tools for manipulating files and directories in the file system''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Iterable from pathlib import Path diff --git a/polymerist/genutils/fileutils/jsonio/__init__.py b/polymerist/genutils/fileutils/jsonio/__init__.py index 66cec66..8d69303 100644 --- a/polymerist/genutils/fileutils/jsonio/__init__.py +++ b/polymerist/genutils/fileutils/jsonio/__init__.py @@ -1 +1,4 @@ '''Utilities for extending the Python-JSON interface''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/fileutils/jsonio/jsonify.py b/polymerist/genutils/fileutils/jsonio/jsonify.py index f5f5ae3..dab42a1 100644 --- a/polymerist/genutils/fileutils/jsonio/jsonify.py +++ b/polymerist/genutils/fileutils/jsonio/jsonify.py @@ -1,5 +1,8 @@ '''Tools for making existing classes easily readable/writable to JSON''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Callable, ClassVar, Optional, Type, TypeVar, Union C = TypeVar('C') # generic type for classes diff --git a/polymerist/genutils/fileutils/jsonio/serialize.py b/polymerist/genutils/fileutils/jsonio/serialize.py index 76bd3f0..efc01fb 100644 --- a/polymerist/genutils/fileutils/jsonio/serialize.py +++ b/polymerist/genutils/fileutils/jsonio/serialize.py @@ -1,5 +1,8 @@ '''Interfaces for extending what types of objects can be serialized to JSON''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, ClassVar, Optional, Type, TypeVar, Union from abc import ABC, abstractstaticmethod from inspect import isclass diff --git a/polymerist/genutils/fileutils/jsonio/update.py b/polymerist/genutils/fileutils/jsonio/update.py index ce1aeb0..841ea33 100644 --- a/polymerist/genutils/fileutils/jsonio/update.py +++ b/polymerist/genutils/fileutils/jsonio/update.py @@ -1,5 +1,8 @@ '''Tools for statically or dynamically updating JSON files''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import json from pathlib import Path diff --git a/polymerist/genutils/fileutils/pathutils.py b/polymerist/genutils/fileutils/pathutils.py index e77c756..b365a9e 100644 --- a/polymerist/genutils/fileutils/pathutils.py +++ b/polymerist/genutils/fileutils/pathutils.py @@ -1,5 +1,8 @@ '''Utilities for editing, augmenting, and querying Paths''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union from pathlib import Path diff --git a/polymerist/genutils/filters.py b/polymerist/genutils/filters.py index 18ceeed..0c6ccaf 100644 --- a/polymerist/genutils/filters.py +++ b/polymerist/genutils/filters.py @@ -1,5 +1,8 @@ '''Typehinting and generic implementations of filter (indicator) functions''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, TypeVar T = TypeVar('T') diff --git a/polymerist/genutils/importutils/__init__.py b/polymerist/genutils/importutils/__init__.py index 4a3662c..1e768b2 100644 --- a/polymerist/genutils/importutils/__init__.py +++ b/polymerist/genutils/importutils/__init__.py @@ -1,4 +1,7 @@ '''Functionality for dynamically importing and inspecting Python modules and packages''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .pkgiter import module_hierarchy, iter_submodules, module_tree, module_tree_direct from .pkginspect import is_package, is_module \ No newline at end of file diff --git a/polymerist/genutils/importutils/pkginspect.py b/polymerist/genutils/importutils/pkginspect.py index f05bff8..34fd72e 100644 --- a/polymerist/genutils/importutils/pkginspect.py +++ b/polymerist/genutils/importutils/pkginspect.py @@ -1,5 +1,8 @@ '''For checking whether object are valid Python modules and packages, and if so for gathering info from within them''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional, Union from types import ModuleType from pathlib import Path diff --git a/polymerist/genutils/importutils/pkgiter.py b/polymerist/genutils/importutils/pkgiter.py index 7e10b08..f7231e1 100644 --- a/polymerist/genutils/importutils/pkgiter.py +++ b/polymerist/genutils/importutils/pkgiter.py @@ -1,5 +1,8 @@ '''Tools for iterating over and extracting information from Python package hierarchies''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/genutils/importutils/pyimports.py b/polymerist/genutils/importutils/pyimports.py index 4b1789c..36d6d0f 100644 --- a/polymerist/genutils/importutils/pyimports.py +++ b/polymerist/genutils/importutils/pyimports.py @@ -1,5 +1,8 @@ '''For inspecting and managing toplevel imports within Python files and modules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional from types import ModuleType from dataclasses import dataclass, field diff --git a/polymerist/genutils/iteration.py b/polymerist/genutils/iteration.py index fe7ea71..366528b 100644 --- a/polymerist/genutils/iteration.py +++ b/polymerist/genutils/iteration.py @@ -1,5 +1,8 @@ '''Tools for simplifying iteration over collections of items''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Callable, Generator, Iterable, TypeVar, Union from operator import mul diff --git a/polymerist/genutils/logutils/IOHandlers.py b/polymerist/genutils/logutils/IOHandlers.py index 8fc9530..b161e1a 100644 --- a/polymerist/genutils/logutils/IOHandlers.py +++ b/polymerist/genutils/logutils/IOHandlers.py @@ -1,5 +1,8 @@ '''Tools for simplifying logging from multiple sources''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging from logging import Logger from traceback import format_exception diff --git a/polymerist/genutils/logutils/__init__.py b/polymerist/genutils/logutils/__init__.py index 9c22d8d..ce994c6 100644 --- a/polymerist/genutils/logutils/__init__.py +++ b/polymerist/genutils/logutils/__init__.py @@ -1 +1,4 @@ -'''Utilities for tracking, wrapping, and redirecting logging output''' \ No newline at end of file +'''Utilities for tracking, wrapping, and redirecting logging output''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/logutils/timestamps.py b/polymerist/genutils/logutils/timestamps.py index cae22a8..3d6ed56 100644 --- a/polymerist/genutils/logutils/timestamps.py +++ b/polymerist/genutils/logutils/timestamps.py @@ -1,5 +1,8 @@ '''Tools for formatting and recording timestamps for events''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union from dataclasses import dataclass diff --git a/polymerist/genutils/sequences/__init__.py b/polymerist/genutils/sequences/__init__.py index 46356c2..bcf0b69 100644 --- a/polymerist/genutils/sequences/__init__.py +++ b/polymerist/genutils/sequences/__init__.py @@ -1,3 +1,6 @@ '''Tools for working with ordered sequences (i.e. collections of elements with definite length and order)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .seqops import * # backwards-compatible change which also makes some utils more easily accessible \ No newline at end of file diff --git a/polymerist/genutils/sequences/discernment/__init__.py b/polymerist/genutils/sequences/discernment/__init__.py index 1175acb..3ecdb6d 100644 --- a/polymerist/genutils/sequences/discernment/__init__.py +++ b/polymerist/genutils/sequences/discernment/__init__.py @@ -5,6 +5,10 @@ Given a "word" (a sequence of N symbols of type T), and a mapped sequence of "bins" (ordered collection of multisets of type T, each assigned a label of type L), enumerate all N-tuples of labels such that the symbols of the words could be drawn from the bins with those labels in that order ''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .enumeration import DISCERNMENTSolver from .strategies import ( DISCERNMENTStrategyStack, diff --git a/polymerist/genutils/sequences/discernment/enumeration.py b/polymerist/genutils/sequences/discernment/enumeration.py index 46b9cda..45a3594 100644 --- a/polymerist/genutils/sequences/discernment/enumeration.py +++ b/polymerist/genutils/sequences/discernment/enumeration.py @@ -1,5 +1,8 @@ '''Front-facing solver facade for the DISCERNMENT Problem''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Iterable, Mapping, Sequence, Union from .inventory import SymbolInventory, T, L diff --git a/polymerist/genutils/sequences/discernment/inventory.py b/polymerist/genutils/sequences/discernment/inventory.py index 64a62e7..6a96ade 100644 --- a/polymerist/genutils/sequences/discernment/inventory.py +++ b/polymerist/genutils/sequences/discernment/inventory.py @@ -1,6 +1,9 @@ '''Utilities and type-hinting for creating symbol inventories, which map symbols and bin labels to occurences Useful concrete data structure for representing ordered sequences of symbol multisets for generalized ransom note enumeration''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/genutils/sequences/discernment/strategies.py b/polymerist/genutils/sequences/discernment/strategies.py index 48dfbc4..6c241dc 100644 --- a/polymerist/genutils/sequences/discernment/strategies.py +++ b/polymerist/genutils/sequences/discernment/strategies.py @@ -1,5 +1,8 @@ '''Abstract base and concrete implementations of algorithms which solve the DISCERNMENT Problem''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Generic, ParamSpec, Sequence, TypeVar from abc import ABC, abstractmethod diff --git a/polymerist/genutils/sequences/seqops.py b/polymerist/genutils/sequences/seqops.py index aa6c976..fa36efe 100644 --- a/polymerist/genutils/sequences/seqops.py +++ b/polymerist/genutils/sequences/seqops.py @@ -1,5 +1,8 @@ '''Generic operations for indexing, generating, and iterating over sequences''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Sequence, TypeVar, Union T = TypeVar('T') # generic type for sequence element S = TypeVar('S') # generic type for a distinct sequence element diff --git a/polymerist/genutils/sequences/similarity/__init__.py b/polymerist/genutils/sequences/similarity/__init__.py index 88514e3..ee72a42 100644 --- a/polymerist/genutils/sequences/similarity/__init__.py +++ b/polymerist/genutils/sequences/similarity/__init__.py @@ -1 +1,4 @@ -'''Methods for evaluating the similarity of and inspecting the edits between sequences''' \ No newline at end of file +'''Methods for evaluating the similarity of and inspecting the edits between sequences''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/sequences/similarity/distances.py b/polymerist/genutils/sequences/similarity/distances.py index b8c56df..5757de2 100644 --- a/polymerist/genutils/sequences/similarity/distances.py +++ b/polymerist/genutils/sequences/similarity/distances.py @@ -1,5 +1,8 @@ '''Implementations of calculation methods for sequence distance ("inverse similarity") metrics''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + # NOTE: much of this could be supplanted in the future by the well-implemented textdistance library (https://github.com/life4/textdistance) from typing import Sequence, TypeVar T = TypeVar('T') diff --git a/polymerist/genutils/sequences/similarity/edits.py b/polymerist/genutils/sequences/similarity/edits.py index f32d16e..60dc8d3 100644 --- a/polymerist/genutils/sequences/similarity/edits.py +++ b/polymerist/genutils/sequences/similarity/edits.py @@ -1,5 +1,8 @@ '''For calculating the edit distance between sequences and inspecting the edits needed to go between them''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Generator, Sequence, Type, TypeVar, TypeAlias from dataclasses import dataclass, field, replace T = TypeVar('T') diff --git a/polymerist/genutils/textual/__init__.py b/polymerist/genutils/textual/__init__.py index 1680f38..5a21ad3 100644 --- a/polymerist/genutils/textual/__init__.py +++ b/polymerist/genutils/textual/__init__.py @@ -1,4 +1,7 @@ '''Tools for manipulating, processing, and pretty-printing text from files and string-like objects''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from string import ascii_letters, ascii_lowercase, ascii_uppercase ascii_printable = ''.join(chr(i) for i in range(33, 127)) # ASCII printable characters, minus SPACE (" ", 32) and DELETE (127) \ No newline at end of file diff --git a/polymerist/genutils/textual/casing.py b/polymerist/genutils/textual/casing.py index 741ce17..d6f3578 100644 --- a/polymerist/genutils/textual/casing.py +++ b/polymerist/genutils/textual/casing.py @@ -1,5 +1,8 @@ '''Conversion tools between various programming language cases (https://en.wikipedia.org/wiki/Letter_case#Use_within_programming_languages)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + def snake_case_to_camel_case(varname : str) -> str: '''Convert a name from Snake Case to Camel Case E.g. name_of_a_thing -> NameOfAThing''' diff --git a/polymerist/genutils/textual/delimiters.py b/polymerist/genutils/textual/delimiters.py index 9fa8f91..164bb4d 100644 --- a/polymerist/genutils/textual/delimiters.py +++ b/polymerist/genutils/textual/delimiters.py @@ -1,5 +1,8 @@ '''Validation and parsing of text enclosed by opening and closing delimiters (i.e. parentheses-like behavior)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Iterable, Union import re diff --git a/polymerist/genutils/textual/encoding.py b/polymerist/genutils/textual/encoding.py index b407355..0dd300b 100644 --- a/polymerist/genutils/textual/encoding.py +++ b/polymerist/genutils/textual/encoding.py @@ -1,5 +1,8 @@ '''Encoding, hashing, and conversion of string to and from various formats''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import hashlib, base64 diff --git a/polymerist/genutils/textual/interpolation.py b/polymerist/genutils/textual/interpolation.py index 1b8091d..ce14ade 100644 --- a/polymerist/genutils/textual/interpolation.py +++ b/polymerist/genutils/textual/interpolation.py @@ -1,5 +1,8 @@ '''For inserting text into other text in a rules-based manner''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import re diff --git a/polymerist/genutils/textual/ordinals.py b/polymerist/genutils/textual/ordinals.py index e97ae39..0b32686 100644 --- a/polymerist/genutils/textual/ordinals.py +++ b/polymerist/genutils/textual/ordinals.py @@ -1,5 +1,8 @@ '''Tools for converting back and forth between integers and ordinal number words and prefices (https://en.wikipedia.org/wiki/Ordinal_numeral)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + def ordinal_suffix_from_int(n : int) -> str: '''Produce the appropriate word suffix for an integer in sequential order E.g 1 -> "st" as in "first", 17 -> "th" as in "seventeenth, etc.''' diff --git a/polymerist/genutils/textual/prettyprint.py b/polymerist/genutils/textual/prettyprint.py index 5a82c4d..94d9db7 100644 --- a/polymerist/genutils/textual/prettyprint.py +++ b/polymerist/genutils/textual/prettyprint.py @@ -1,5 +1,8 @@ '''For generating human-readable string representations of other Python objects''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any from textwrap import indent diff --git a/polymerist/genutils/textual/strsearch.py b/polymerist/genutils/textual/strsearch.py index 327af3b..919e797 100644 --- a/polymerist/genutils/textual/strsearch.py +++ b/polymerist/genutils/textual/strsearch.py @@ -1,5 +1,8 @@ '''For searching and replacing through strings and text files''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Optional from pathlib import Path diff --git a/polymerist/genutils/trees/__init__.py b/polymerist/genutils/trees/__init__.py index 6e3bfe3..546c778 100644 --- a/polymerist/genutils/trees/__init__.py +++ b/polymerist/genutils/trees/__init__.py @@ -1,5 +1,8 @@ '''Generic functionality for tree-like data structures. Based on the anytree module (https://github.com/c0fec0de/anytree)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .treebase import NodeCorrespondence, compile_tree_factory from .treecopy import get_node_attrs, copy_tree, tree_to_networkx from .treeviz import treestr \ No newline at end of file diff --git a/polymerist/genutils/trees/treebase.py b/polymerist/genutils/trees/treebase.py index 4530480..140b5a8 100644 --- a/polymerist/genutils/trees/treebase.py +++ b/polymerist/genutils/trees/treebase.py @@ -1,5 +1,8 @@ '''Interfaces for encoding arbitrary classes into tree-like data structures''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Callable, Generic, Iterable, Optional, TypeAlias, TypeVar from abc import ABC, abstractmethod diff --git a/polymerist/genutils/trees/treecopy.py b/polymerist/genutils/trees/treecopy.py index a6e1918..6171a40 100644 --- a/polymerist/genutils/trees/treecopy.py +++ b/polymerist/genutils/trees/treecopy.py @@ -1,5 +1,8 @@ '''Tools for copying parts and wholes of trees, at various levels of resolution''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Optional from anytree.node import Node diff --git a/polymerist/genutils/trees/treeviz.py b/polymerist/genutils/trees/treeviz.py index f945114..abbff1a 100644 --- a/polymerist/genutils/trees/treeviz.py +++ b/polymerist/genutils/trees/treeviz.py @@ -1,5 +1,8 @@ '''Wrappers for printing out tree-like data structures''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Iterable, TypeVar, Union from anytree import Node diff --git a/polymerist/genutils/typetools/__init__.py b/polymerist/genutils/typetools/__init__.py index d22eb0a..e6121b1 100644 --- a/polymerist/genutils/typetools/__init__.py +++ b/polymerist/genutils/typetools/__init__.py @@ -1 +1,4 @@ -'''Additional type-hinting and typechecking not provided by Python builtins''' \ No newline at end of file +'''Additional type-hinting and typechecking not provided by Python builtins''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/genutils/typetools/categorical.py b/polymerist/genutils/typetools/categorical.py index 3bb57a8..f683a32 100644 --- a/polymerist/genutils/typetools/categorical.py +++ b/polymerist/genutils/typetools/categorical.py @@ -1,5 +1,8 @@ '''Type-hinting for Union-bound categories of types''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Callable, Container, Iterable, Sequence, Type, TypeAlias, Union from numpy import ( ndarray, diff --git a/polymerist/genutils/typetools/numpytypes.py b/polymerist/genutils/typetools/numpytypes.py index 6d231f4..c69abaa 100644 --- a/polymerist/genutils/typetools/numpytypes.py +++ b/polymerist/genutils/typetools/numpytypes.py @@ -1,5 +1,8 @@ '''Type aliases specific to numpy arrays''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Type, TypeAlias, TypeVar, Union import numpy.typing as npt diff --git a/polymerist/genutils/typetools/parametric.py b/polymerist/genutils/typetools/parametric.py index 3fef8d1..2af172e 100644 --- a/polymerist/genutils/typetools/parametric.py +++ b/polymerist/genutils/typetools/parametric.py @@ -1,5 +1,8 @@ '''Type aliases for Python callable input parameters''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import TypeVar, ParamSpec, _UnionGenericAlias T = TypeVar('T') # universal generic type diff --git a/polymerist/graphics/__init__.py b/polymerist/graphics/__init__.py index 2a7affd..4f1b037 100644 --- a/polymerist/graphics/__init__.py +++ b/polymerist/graphics/__init__.py @@ -1 +1,4 @@ -'''Utilities for plotting, image manipulation, and colormapping''' \ No newline at end of file +'''Utilities for plotting, image manipulation, and colormapping''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/graphics/colors.py b/polymerist/graphics/colors.py index 02e95f7..2029ca6 100644 --- a/polymerist/graphics/colors.py +++ b/polymerist/graphics/colors.py @@ -1,5 +1,8 @@ '''Representations and conersion methods for colors''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import TypeAlias, Union RGB : TypeAlias = tuple[int, int, int] diff --git a/polymerist/graphics/imageutils.py b/polymerist/graphics/imageutils.py index 690765f..4d8bf71 100644 --- a/polymerist/graphics/imageutils.py +++ b/polymerist/graphics/imageutils.py @@ -1,5 +1,8 @@ '''Tools for editing and manipulating images, and image colors, sizes, and representations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union import numpy as np diff --git a/polymerist/graphics/named_colors.py b/polymerist/graphics/named_colors.py index 63ab349..da92e0c 100644 --- a/polymerist/graphics/named_colors.py +++ b/polymerist/graphics/named_colors.py @@ -1,5 +1,8 @@ '''Particular color values which occur often enough or are unique enough to have distinct names''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + BLACK = (0, 0, 0) WHITE = (255, 255, 255) diff --git a/polymerist/graphics/plotutils.py b/polymerist/graphics/plotutils.py index 510661a..bcef118 100644 --- a/polymerist/graphics/plotutils.py +++ b/polymerist/graphics/plotutils.py @@ -1,5 +1,8 @@ '''Tools for generating plots and other graphics''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Iterable, Optional from pathlib import Path from PIL.Image import Image diff --git a/polymerist/maths/__init__.py b/polymerist/maths/__init__.py index 0e28976..65bec0d 100644 --- a/polymerist/maths/__init__.py +++ b/polymerist/maths/__init__.py @@ -1 +1,4 @@ -'''Module for general mathematical operations, calculations, and notation''' \ No newline at end of file +'''Module for general mathematical operations, calculations, and notation''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/maths/combinatorics/__init__.py b/polymerist/maths/combinatorics/__init__.py index cf00ad8..74519a6 100644 --- a/polymerist/maths/combinatorics/__init__.py +++ b/polymerist/maths/combinatorics/__init__.py @@ -1 +1,4 @@ -'''Tools for combinatorial analysis and enumeration''' \ No newline at end of file +'''Tools for combinatorial analysis and enumeration''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/maths/combinatorics/numbers.py b/polymerist/maths/combinatorics/numbers.py index db32ae8..2acef54 100644 --- a/polymerist/maths/combinatorics/numbers.py +++ b/polymerist/maths/combinatorics/numbers.py @@ -1,5 +1,8 @@ '''Utilities for calculating fundamental combinatorial numbers''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Iterable from operator import mul from math import factorial # not worth re-implementing here, the C-implementation is plenty fast diff --git a/polymerist/maths/combinatorics/partitions.py b/polymerist/maths/combinatorics/partitions.py index 696f534..f1903ef 100644 --- a/polymerist/maths/combinatorics/partitions.py +++ b/polymerist/maths/combinatorics/partitions.py @@ -1,5 +1,8 @@ '''For explicitly enumerating partitions of sets and multisets''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Sequence diff --git a/polymerist/maths/combinatorics/permutations.py b/polymerist/maths/combinatorics/permutations.py index f9d1459..f93f5aa 100644 --- a/polymerist/maths/combinatorics/permutations.py +++ b/polymerist/maths/combinatorics/permutations.py @@ -1,5 +1,8 @@ '''Utilites for representing pure permutations, cycles, and permutation groups''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Iterable, Optional, Sequence, TypeVar from dataclasses import dataclass, field T = TypeVar('T') diff --git a/polymerist/maths/fractions/__init__.py b/polymerist/maths/fractions/__init__.py index 96562db..80d5a16 100644 --- a/polymerist/maths/fractions/__init__.py +++ b/polymerist/maths/fractions/__init__.py @@ -1 +1,4 @@ -'''Tools for representing and calculating with fractions and other ratio-like objects''' \ No newline at end of file +'''Tools for representing and calculating with fractions and other ratio-like objects''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/maths/fractions/continued.py b/polymerist/maths/fractions/continued.py index 741aa4e..bb03caf 100644 --- a/polymerist/maths/fractions/continued.py +++ b/polymerist/maths/fractions/continued.py @@ -1,5 +1,8 @@ '''Representations and computation methods for continued fractions and ration approximations to real numbers''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union, Generator, Iterable, Type, TypeAlias, TypeVar from ...genutils.typetools.numpytypes import Shape, NPInt diff --git a/polymerist/maths/fractions/ratios.py b/polymerist/maths/fractions/ratios.py index caff6ca..e6430e9 100644 --- a/polymerist/maths/fractions/ratios.py +++ b/polymerist/maths/fractions/ratios.py @@ -1,5 +1,8 @@ '''For representing rational numbers, and more general ratios''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from dataclasses import dataclass from typing import Any, Callable, ClassVar, TypeVar N = TypeVar('N') diff --git a/polymerist/maths/greek/__init__.py b/polymerist/maths/greek/__init__.py index 7291966..244ef17 100644 --- a/polymerist/maths/greek/__init__.py +++ b/polymerist/maths/greek/__init__.py @@ -1,4 +1,7 @@ '''Reference for greek letters, prefixes, and other symbols''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .alphabet import GREEK_LETTER_NAMES, GREEK_LOWER, GREEK_UPPER from .prefixes import GREEK_PREFIXES, get_greek_prefix \ No newline at end of file diff --git a/polymerist/maths/greek/alphabet.py b/polymerist/maths/greek/alphabet.py index e44b8f0..c4da25d 100644 --- a/polymerist/maths/greek/alphabet.py +++ b/polymerist/maths/greek/alphabet.py @@ -1,5 +1,8 @@ '''Tabulated references of Greek letter names, prefices, and unicode characters''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + GREEK_LETTER_NAMES = [ # names for greek character literals 'alpha', 'beta', diff --git a/polymerist/maths/greek/prefixes.py b/polymerist/maths/greek/prefixes.py index 01f121a..3e0928f 100644 --- a/polymerist/maths/greek/prefixes.py +++ b/polymerist/maths/greek/prefixes.py @@ -1,5 +1,8 @@ '''Systematic Greek numerical prefixes''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from ..combinatorics.partitions import make_change_greedy # CONSTANT REFERENCE VALUES (adapted from https://en.wikipedia.org/wiki/List_of_polygons#Systematic_polygon_names) diff --git a/polymerist/maths/lattices/__init__.py b/polymerist/maths/lattices/__init__.py index 8ed023d..842d8cf 100644 --- a/polymerist/maths/lattices/__init__.py +++ b/polymerist/maths/lattices/__init__.py @@ -1 +1,4 @@ -'''Tools for representing and manipulating coordinate lattices''' \ No newline at end of file +'''Tools for representing and manipulating coordinate lattices''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/maths/lattices/bravais.py b/polymerist/maths/lattices/bravais.py index 898dc59..fda2618 100644 --- a/polymerist/maths/lattices/bravais.py +++ b/polymerist/maths/lattices/bravais.py @@ -1,5 +1,8 @@ '''Representations and calculation methods for crystallographic unit cells and lattice parameters''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, ClassVar, Optional, Type from numbers import Number from ...genutils.typetools.numpytypes import Shape, D diff --git a/polymerist/maths/lattices/coordinates.py b/polymerist/maths/lattices/coordinates.py index eb70689..69738d8 100644 --- a/polymerist/maths/lattices/coordinates.py +++ b/polymerist/maths/lattices/coordinates.py @@ -1,5 +1,8 @@ '''Representation of vectors of coordinates and elementary distance geometry operations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generic, Optional, TypeVar, Union from numbers import Number from ...genutils.typetools.numpytypes import Shape, N, D diff --git a/polymerist/maths/lattices/integral.py b/polymerist/maths/lattices/integral.py index 4f44cf2..7392708 100644 --- a/polymerist/maths/lattices/integral.py +++ b/polymerist/maths/lattices/integral.py @@ -1,5 +1,8 @@ '''Core tools for manipulating integer lattices in D-dimensions''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Iterable from numbers import Number from ...genutils.typetools.numpytypes import Shape, D, N diff --git a/polymerist/maths/linearalg/__init__.py b/polymerist/maths/linearalg/__init__.py index ca34ecb..8ab7e5e 100644 --- a/polymerist/maths/linearalg/__init__.py +++ b/polymerist/maths/linearalg/__init__.py @@ -1 +1,4 @@ -'''Custom linear algebra utilities not already supplied by a numeric processing library like numpy or scipy''' \ No newline at end of file +'''Custom linear algebra utilities not already supplied by a numeric processing library like numpy or scipy''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/maths/linearalg/affine.py b/polymerist/maths/linearalg/affine.py index 48c5686..cdc0cd7 100644 --- a/polymerist/maths/linearalg/affine.py +++ b/polymerist/maths/linearalg/affine.py @@ -1,5 +1,8 @@ '''Utilities to streamline creation of 4x4 affine transformation matrices of 3D linear transformations in homogeneous coordinates''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import numpy as np import numpy.typing as npt from typing import Annotated, Literal diff --git a/polymerist/maths/linearalg/decomposition.py b/polymerist/maths/linearalg/decomposition.py index 4209c00..68a372c 100644 --- a/polymerist/maths/linearalg/decomposition.py +++ b/polymerist/maths/linearalg/decomposition.py @@ -1,5 +1,8 @@ '''Tools for matrix decomposition''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import numpy as np from ...genutils.typetools.numpytypes import Shape, DType, N, M diff --git a/polymerist/maths/numbersys/__init__.py b/polymerist/maths/numbersys/__init__.py index 5ce1c8b..40992c9 100644 --- a/polymerist/maths/numbersys/__init__.py +++ b/polymerist/maths/numbersys/__init__.py @@ -1,5 +1,8 @@ '''Implementations of various number systems and representations of numbers''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .commonbases import FixedRadixNumberSystem, FactorialNumberSystem # initialization of some common bases diff --git a/polymerist/maths/numbersys/commonbases.py b/polymerist/maths/numbersys/commonbases.py index cd89eb8..3d8174a 100644 --- a/polymerist/maths/numbersys/commonbases.py +++ b/polymerist/maths/numbersys/commonbases.py @@ -1,5 +1,8 @@ '''Specialized cases of general positional numbering systems which are more common in usage''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Generator, Iterable from .positional import PositionalNumberingSystem diff --git a/polymerist/maths/numbersys/positional.py b/polymerist/maths/numbersys/positional.py index e9b633d..e2b0522 100644 --- a/polymerist/maths/numbersys/positional.py +++ b/polymerist/maths/numbersys/positional.py @@ -1,5 +1,8 @@ '''Conversion tools for representing positive integers in fixed and mixed radix positional bases''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Generator, Sequence, Union from math import inf diff --git a/polymerist/maths/primes.py b/polymerist/maths/primes.py index 7e7c66c..0f54813 100644 --- a/polymerist/maths/primes.py +++ b/polymerist/maths/primes.py @@ -1,5 +1,8 @@ '''Utilities for examining prime numbers and integer factorizations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import TypeAlias from math import sqrt diff --git a/polymerist/maths/statistics.py b/polymerist/maths/statistics.py index 7b1842c..01c403d 100644 --- a/polymerist/maths/statistics.py +++ b/polymerist/maths/statistics.py @@ -1,5 +1,8 @@ '''For computing statistical measures and series' of structured data''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import numpy as np from dataclasses import dataclass diff --git a/polymerist/mdtools/__init__.py b/polymerist/mdtools/__init__.py index cb78104..50c3719 100644 --- a/polymerist/mdtools/__init__.py +++ b/polymerist/mdtools/__init__.py @@ -1 +1,4 @@ -'''Tools for interfacing with and setting up systems for various molecular dynamics packages''' \ No newline at end of file +'''Tools for interfacing with and setting up systems for various molecular dynamics packages''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/lammpstools/__init__.py b/polymerist/mdtools/lammpstools/__init__.py index 64025b8..a7a260e 100644 --- a/polymerist/mdtools/lammpstools/__init__.py +++ b/polymerist/mdtools/lammpstools/__init__.py @@ -1 +1,4 @@ -'''Tools for interfacing with parsing input files from LAMMPS''' \ No newline at end of file +'''Tools for interfacing with parsing input files from LAMMPS''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/lammpstools/lammpseval.py b/polymerist/mdtools/lammpstools/lammpseval.py index 632a0e3..6e213cd 100644 --- a/polymerist/mdtools/lammpstools/lammpseval.py +++ b/polymerist/mdtools/lammpstools/lammpseval.py @@ -1,5 +1,8 @@ '''For gathering information and running calculations from LAMMPS input files''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional, Union import re diff --git a/polymerist/mdtools/lammpstools/unitstyles.py b/polymerist/mdtools/lammpstools/unitstyles.py index d610bb6..bce83c1 100644 --- a/polymerist/mdtools/lammpstools/unitstyles.py +++ b/polymerist/mdtools/lammpstools/unitstyles.py @@ -1,5 +1,8 @@ '''Reference for LAMMPS unit styles, as listed in https://docs.lammps.org/units.html''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import ClassVar, Optional from dataclasses import dataclass, field from ...genutils.decorators.classmod import register_subclasses diff --git a/polymerist/mdtools/openfftools/__init__.py b/polymerist/mdtools/openfftools/__init__.py index faa9a51..5e86394 100644 --- a/polymerist/mdtools/openfftools/__init__.py +++ b/polymerist/mdtools/openfftools/__init__.py @@ -1,5 +1,8 @@ '''Tools for manipulating and extending OpenFF objects, and for interfacing with other tools and formats''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any from pathlib import Path diff --git a/polymerist/mdtools/openfftools/boxvectors.py b/polymerist/mdtools/openfftools/boxvectors.py index c5aa5d9..011a810 100644 --- a/polymerist/mdtools/openfftools/boxvectors.py +++ b/polymerist/mdtools/openfftools/boxvectors.py @@ -1,5 +1,8 @@ '''For obtaining, scaling, and manipulating box vectors for Topologies''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Annotated, Literal, TypeAlias, Union import numpy.typing as npt diff --git a/polymerist/mdtools/openfftools/omminter.py b/polymerist/mdtools/openfftools/omminter.py index 5f5cc71..199b239 100644 --- a/polymerist/mdtools/openfftools/omminter.py +++ b/polymerist/mdtools/openfftools/omminter.py @@ -1,5 +1,8 @@ '''For interfacing between OpenFF and OpenMM representations, along with the file analogues''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional, Union from pathlib import Path diff --git a/polymerist/mdtools/openfftools/partialcharge/__init__.py b/polymerist/mdtools/openfftools/partialcharge/__init__.py index 2c52a3d..c5612d8 100644 --- a/polymerist/mdtools/openfftools/partialcharge/__init__.py +++ b/polymerist/mdtools/openfftools/partialcharge/__init__.py @@ -1 +1,4 @@ -'''Tools for systematizing the assignment of partial charges to OpenFF Molecules''' \ No newline at end of file +'''Tools for systematizing the assignment of partial charges to OpenFF Molecules''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/openfftools/partialcharge/chargemethods.py b/polymerist/mdtools/openfftools/partialcharge/chargemethods.py index c7c78ca..81b3cae 100644 --- a/polymerist/mdtools/openfftools/partialcharge/chargemethods.py +++ b/polymerist/mdtools/openfftools/partialcharge/chargemethods.py @@ -1,5 +1,8 @@ '''Registry module for keeping track of which partial charging toolkit registries and related methods are available''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Type from collections import defaultdict diff --git a/polymerist/mdtools/openfftools/partialcharge/molchargers.py b/polymerist/mdtools/openfftools/partialcharge/molchargers.py index bb3fdf5..cb8d68e 100644 --- a/polymerist/mdtools/openfftools/partialcharge/molchargers.py +++ b/polymerist/mdtools/openfftools/partialcharge/molchargers.py @@ -1,5 +1,8 @@ '''Classes for partial charge assignment of OpenFF Molecules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openfftools/partialcharge/rescharge/__init__.py b/polymerist/mdtools/openfftools/partialcharge/rescharge/__init__.py index 68f5953..79802b2 100644 --- a/polymerist/mdtools/openfftools/partialcharge/rescharge/__init__.py +++ b/polymerist/mdtools/openfftools/partialcharge/rescharge/__init__.py @@ -1 +1,4 @@ -'''Tools for handling library charges, both for computing from and applying to Molecules''' \ No newline at end of file +'''Tools for handling library charges, both for computing from and applying to Molecules''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/openfftools/partialcharge/rescharge/calculation.py b/polymerist/mdtools/openfftools/partialcharge/rescharge/calculation.py index c1f9c8c..f4abee7 100644 --- a/polymerist/mdtools/openfftools/partialcharge/rescharge/calculation.py +++ b/polymerist/mdtools/openfftools/partialcharge/rescharge/calculation.py @@ -1,5 +1,8 @@ '''Utilities for generating, storing, and applying partial charges to OpenFF Molecules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py b/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py index 6ac82f4..dd168e7 100644 --- a/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py +++ b/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py @@ -1,5 +1,8 @@ '''Interfaces between residue-charge calculation methods and OpenFF (or other external) tools''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import ClassVar from dataclasses import dataclass diff --git a/polymerist/mdtools/openfftools/partialcharge/rescharge/rctypes.py b/polymerist/mdtools/openfftools/partialcharge/rescharge/rctypes.py index 724276b..f455190 100644 --- a/polymerist/mdtools/openfftools/partialcharge/rescharge/rctypes.py +++ b/polymerist/mdtools/openfftools/partialcharge/rescharge/rctypes.py @@ -1,5 +1,8 @@ '''Custom types used in determining residue charges''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import TypeAlias from rdkit.Chem.rdchem import Mol from dataclasses import dataclass, field diff --git a/polymerist/mdtools/openfftools/partialcharge/rescharge/redistribution.py b/polymerist/mdtools/openfftools/partialcharge/rescharge/redistribution.py index 946cae5..fddfe57 100644 --- a/polymerist/mdtools/openfftools/partialcharge/rescharge/redistribution.py +++ b/polymerist/mdtools/openfftools/partialcharge/rescharge/redistribution.py @@ -1,5 +1,8 @@ '''Strategies for redistribution excess partial charge among residues''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from dataclasses import dataclass from abc import ABC, abstractmethod from rdkit.Chem import Mol diff --git a/polymerist/mdtools/openfftools/partition.py b/polymerist/mdtools/openfftools/partition.py index 64cbff3..185914d 100644 --- a/polymerist/mdtools/openfftools/partition.py +++ b/polymerist/mdtools/openfftools/partition.py @@ -3,6 +3,7 @@ Code derived from original implementation and conceptualization by Connor Davel (https://github.com/openforcefield/polymer_examples/blob/main/monomer_generation/partition.py) ''' __author__ = 'Connor Davel, Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' from typing import TypeAlias diff --git a/polymerist/mdtools/openfftools/solvation/__init__.py b/polymerist/mdtools/openfftools/solvation/__init__.py index bae66c3..1b04bab 100644 --- a/polymerist/mdtools/openfftools/solvation/__init__.py +++ b/polymerist/mdtools/openfftools/solvation/__init__.py @@ -1 +1,4 @@ -'''Tools for adding solvent to a Topology''' \ No newline at end of file +'''Tools for adding solvent to a Topology''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/openfftools/solvation/packing.py b/polymerist/mdtools/openfftools/solvation/packing.py index 768570a..b88c8ce 100644 --- a/polymerist/mdtools/openfftools/solvation/packing.py +++ b/polymerist/mdtools/openfftools/solvation/packing.py @@ -1,5 +1,8 @@ '''For packing solvents into Topology boxes using packmol''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openfftools/solvation/physprops.py b/polymerist/mdtools/openfftools/solvation/physprops.py index eb75c0e..5db9a28 100644 --- a/polymerist/mdtools/openfftools/solvation/physprops.py +++ b/polymerist/mdtools/openfftools/solvation/physprops.py @@ -1,5 +1,8 @@ '''For converting macroscopic parameters (such as concentration, bulk density, etc) into microscopic parameters for simulations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union from math import ceil diff --git a/polymerist/mdtools/openfftools/solvation/solvents/__init__.py b/polymerist/mdtools/openfftools/solvation/solvents/__init__.py index 20e3a8d..f540da0 100644 --- a/polymerist/mdtools/openfftools/solvation/solvents/__init__.py +++ b/polymerist/mdtools/openfftools/solvation/solvents/__init__.py @@ -1,5 +1,8 @@ '''For curating pre-defined solvent molecules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from pathlib import Path # TODO : reimplement "properly" "using importlib_resources (more complicated than it's worth for now) _MODULE_PATH = Path(__path__[0]) diff --git a/polymerist/mdtools/openfftools/topology.py b/polymerist/mdtools/openfftools/topology.py index 4202007..210beb7 100644 --- a/polymerist/mdtools/openfftools/topology.py +++ b/polymerist/mdtools/openfftools/topology.py @@ -1,5 +1,8 @@ '''Utilities for serializing, converting, and extracting info from OpenFF topologies''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openmmtools/__init__.py b/polymerist/mdtools/openmmtools/__init__.py index a9bd94d..a992c19 100644 --- a/polymerist/mdtools/openmmtools/__init__.py +++ b/polymerist/mdtools/openmmtools/__init__.py @@ -1 +1,4 @@ -'''Tools for setting up, saving, loading, documenting, and running OpenMM molecular dynamics simulations''' \ No newline at end of file +'''Tools for setting up, saving, loading, documenting, and running OpenMM molecular dynamics simulations''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/openmmtools/description.py b/polymerist/mdtools/openmmtools/description.py index f5578b0..cf5393f 100644 --- a/polymerist/mdtools/openmmtools/description.py +++ b/polymerist/mdtools/openmmtools/description.py @@ -1,5 +1,8 @@ '''Utilities from describing the various parameters and settings possessed by OpenMM objects''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Union from openmm import NonbondedForce, System diff --git a/polymerist/mdtools/openmmtools/evaluation.py b/polymerist/mdtools/openmmtools/evaluation.py index 2e5f53b..6a2f3f1 100644 --- a/polymerist/mdtools/openmmtools/evaluation.py +++ b/polymerist/mdtools/openmmtools/evaluation.py @@ -1,5 +1,8 @@ '''For extracting properties from OpenMM Contexts (e.g. positions, energies, etc)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional from openmm import Context diff --git a/polymerist/mdtools/openmmtools/execution.py b/polymerist/mdtools/openmmtools/execution.py index 6739cd5..5120e49 100644 --- a/polymerist/mdtools/openmmtools/execution.py +++ b/polymerist/mdtools/openmmtools/execution.py @@ -1,5 +1,8 @@ '''For running OpenMM simulations and extracting information from them''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openmmtools/forcegroups.py b/polymerist/mdtools/openmmtools/forcegroups.py index 6416c98..62fdaf3 100644 --- a/polymerist/mdtools/openmmtools/forcegroups.py +++ b/polymerist/mdtools/openmmtools/forcegroups.py @@ -1,5 +1,8 @@ '''Tools for labelling and extracting force groups for Forces within an OpenMM System''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Union from collections import defaultdict from openmm import Force, NonbondedForce, System diff --git a/polymerist/mdtools/openmmtools/parameters.py b/polymerist/mdtools/openmmtools/parameters.py index c0b4d1f..329cb3f 100644 --- a/polymerist/mdtools/openmmtools/parameters.py +++ b/polymerist/mdtools/openmmtools/parameters.py @@ -1,5 +1,8 @@ '''For recording, storing, and organizing parameters associated wtih a Simulation''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from dataclasses import dataclass, field from typing import Any diff --git a/polymerist/mdtools/openmmtools/preparation.py b/polymerist/mdtools/openmmtools/preparation.py index 92fe850..3468cf9 100644 --- a/polymerist/mdtools/openmmtools/preparation.py +++ b/polymerist/mdtools/openmmtools/preparation.py @@ -1,5 +1,8 @@ '''Boilerplate for setting up OpenMM Simulations and related files''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openmmtools/reporters.py b/polymerist/mdtools/openmmtools/reporters.py index 2ad6f3c..9f880fc 100644 --- a/polymerist/mdtools/openmmtools/reporters.py +++ b/polymerist/mdtools/openmmtools/reporters.py @@ -1,5 +1,8 @@ '''Utilities for handlling setup of Simulation Reporters''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openmmtools/serialization.py b/polymerist/mdtools/openmmtools/serialization.py index 36ed802..6bad38a 100644 --- a/polymerist/mdtools/openmmtools/serialization.py +++ b/polymerist/mdtools/openmmtools/serialization.py @@ -1,5 +1,8 @@ '''For reading and writing OpenMM components to files''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/mdtools/openmmtools/thermo.py b/polymerist/mdtools/openmmtools/thermo.py index 8d1b3b5..ce5ef22 100644 --- a/polymerist/mdtools/openmmtools/thermo.py +++ b/polymerist/mdtools/openmmtools/thermo.py @@ -1,5 +1,8 @@ '''Simplifies creation of Simulations which correspond to a particular thermodynamic ensembles''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/polymerist.py b/polymerist/polymerist.py index b062e21..3bd3ea1 100644 --- a/polymerist/polymerist.py +++ b/polymerist/polymerist.py @@ -1,5 +1,7 @@ """Provide the primary functions.""" +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' def canvas(with_attribution=True): """ diff --git a/polymerist/polymers/__init__.py b/polymerist/polymers/__init__.py index d5bc066..a36e5b2 100644 --- a/polymerist/polymers/__init__.py +++ b/polymerist/polymers/__init__.py @@ -1 +1,4 @@ -'''Utilities for representing, building, and esimating chain proerties of polymers''' \ No newline at end of file +'''Utilities for representing, building, and esimating chain proerties of polymers''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 8e9f703..e7898c3 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -1,5 +1,8 @@ '''Utilities for building new polymer structures; currently limited to linear polymers and PDB save format''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/polymers/estimation.py b/polymerist/polymers/estimation.py index 4bcc695..fa56799 100644 --- a/polymerist/polymers/estimation.py +++ b/polymerist/polymers/estimation.py @@ -1,5 +1,8 @@ '''For estimating properties of chains based on their constituent monomers and chain info''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import numpy as np from rdkit import Chem diff --git a/polymerist/polymers/exceptions.py b/polymerist/polymers/exceptions.py index 73c512e..e7b5625 100644 --- a/polymerist/polymers/exceptions.py +++ b/polymerist/polymers/exceptions.py @@ -1,5 +1,8 @@ '''Custom Exceptions specific to Polymers and related objects''' # TODO: go through these and purge errors which are no longer relevant (ported from polysaccharide v1) +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + class SubstructMatchFailedError(Exception): '''Raised when molecule graph isomorphism match does not form a cover''' diff --git a/polymerist/polymers/monographs.py b/polymerist/polymers/monographs.py index c52e53c..1c7bbe0 100644 --- a/polymerist/polymers/monographs.py +++ b/polymerist/polymers/monographs.py @@ -1,5 +1,8 @@ '''Tools for generating and manipulating monomer connectivity graphs''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, ClassVar, Generator, Optional, Sequence, Union import networkx as nx diff --git a/polymerist/polymers/monomers/__init__.py b/polymerist/polymers/monomers/__init__.py index 4003766..176a2ff 100644 --- a/polymerist/polymers/monomers/__init__.py +++ b/polymerist/polymers/monomers/__init__.py @@ -1,3 +1,6 @@ '''For representing, generating, and modifying information about groups of monomer fragments''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .repr import MonomerGroup # make monomer representation available at the module level \ No newline at end of file diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index fd23059..f8ebf62 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -1,5 +1,8 @@ '''For representing monomer information''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Optional, TypeAlias, Union from dataclasses import dataclass, field diff --git a/polymerist/polymers/monomers/specification.py b/polymerist/polymers/monomers/specification.py index 5a8a936..0731105 100644 --- a/polymerist/polymers/monomers/specification.py +++ b/polymerist/polymers/monomers/specification.py @@ -1,5 +1,8 @@ '''Implementations of the canonical monomer substructure SMARTS specification defined in https://doi.org/10.26434/chemrxiv-2023-f2zxd-v2''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/polymers/smidgelib/__init__.py b/polymerist/polymers/smidgelib/__init__.py index 555d8fd..2fa705b 100644 --- a/polymerist/polymers/smidgelib/__init__.py +++ b/polymerist/polymers/smidgelib/__init__.py @@ -1,5 +1,8 @@ '''Utilities for parsing, validating, and translating SMILES-like Monomer Interconnectivity and Degree Graph Encoding (SMIDGE) string''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import re from rdkit import Chem diff --git a/polymerist/polymers/smidgelib/smidgebonds.py b/polymerist/polymers/smidgelib/smidgebonds.py index 2195e9c..fe3d4a3 100644 --- a/polymerist/polymers/smidgelib/smidgebonds.py +++ b/polymerist/polymers/smidgelib/smidgebonds.py @@ -1,5 +1,8 @@ '''Representation classes for monomer graphs bonds which encode intermonomer Port information''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import re from rdkit import Chem diff --git a/polymerist/polymers/smidgelib/smidgeread.py b/polymerist/polymers/smidgelib/smidgeread.py index 84e9e94..09b9f84 100644 --- a/polymerist/polymers/smidgelib/smidgeread.py +++ b/polymerist/polymers/smidgelib/smidgeread.py @@ -1,5 +1,8 @@ '''Automata for reading SMIDGE strings into their graph representations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/polymers/smidgelib/smidgewrite.py b/polymerist/polymers/smidgelib/smidgewrite.py index 694c511..2e3e949 100644 --- a/polymerist/polymers/smidgelib/smidgewrite.py +++ b/polymerist/polymers/smidgelib/smidgewrite.py @@ -1,5 +1,8 @@ '''Automata for reading SMIDGE strings into their graph representations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/rdutils/__init__.py b/polymerist/rdutils/__init__.py index fd9e680..d5a1403 100644 --- a/polymerist/rdutils/__init__.py +++ b/polymerist/rdutils/__init__.py @@ -1 +1,4 @@ -'''Utilities for generating, labelling, editing, and transforming RDKit molecules and other RDKit objects''' \ No newline at end of file +'''Utilities for generating, labelling, editing, and transforming RDKit molecules and other RDKit objects''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/rdutils/bonding/__init__.py b/polymerist/rdutils/bonding/__init__.py index 4ece735..ec0829d 100644 --- a/polymerist/rdutils/bonding/__init__.py +++ b/polymerist/rdutils/bonding/__init__.py @@ -1,3 +1,6 @@ '''Tools for joining together and splitting RDKit molecules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from ._bonding import combined_rdmol \ No newline at end of file diff --git a/polymerist/rdutils/bonding/_bonding.py b/polymerist/rdutils/bonding/_bonding.py index b3c6fce..b33390b 100644 --- a/polymerist/rdutils/bonding/_bonding.py +++ b/polymerist/rdutils/bonding/_bonding.py @@ -1,5 +1,8 @@ '''Base utilities and exceptions used throughout the bonding module''' # TODO : deprecate this module +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/rdutils/bonding/dissolution.py b/polymerist/rdutils/bonding/dissolution.py index 74b72fc..07f3693 100644 --- a/polymerist/rdutils/bonding/dissolution.py +++ b/polymerist/rdutils/bonding/dissolution.py @@ -1,5 +1,8 @@ '''Tools for breaking bonds in RDKit Mols and assigning new Ports''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/rdutils/bonding/formation.py b/polymerist/rdutils/bonding/formation.py index 8fee508..c0114e2 100644 --- a/polymerist/rdutils/bonding/formation.py +++ b/polymerist/rdutils/bonding/formation.py @@ -1,5 +1,8 @@ '''Tools for creating new bonds from free Ports in RDKit Mols''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/rdutils/bonding/identification.py b/polymerist/rdutils/bonding/identification.py index 36dc412..dbfb4a1 100644 --- a/polymerist/rdutils/bonding/identification.py +++ b/polymerist/rdutils/bonding/identification.py @@ -1,5 +1,8 @@ '''Tools for determining how many and which bondable ports are in an RDKit Mol''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Iterable, Optional from itertools import ( combinations, diff --git a/polymerist/rdutils/bonding/permutation.py b/polymerist/rdutils/bonding/permutation.py index 84bc51b..35aa320 100644 --- a/polymerist/rdutils/bonding/permutation.py +++ b/polymerist/rdutils/bonding/permutation.py @@ -1,5 +1,8 @@ '''Tool for swapping bonds within and between RDKit Mols''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/rdutils/bonding/portlib.py b/polymerist/rdutils/bonding/portlib.py index 381eaf8..1fdec70 100644 --- a/polymerist/rdutils/bonding/portlib.py +++ b/polymerist/rdutils/bonding/portlib.py @@ -1,5 +1,8 @@ '''Specification for representing, defining, and characterizing selective intermolecular bond placeholders ("ports")''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import ClassVar, Generator, Optional from dataclasses import dataclass, field diff --git a/polymerist/rdutils/bonding/substitution.py b/polymerist/rdutils/bonding/substitution.py index 75ec575..b30051d 100644 --- a/polymerist/rdutils/bonding/substitution.py +++ b/polymerist/rdutils/bonding/substitution.py @@ -1,5 +1,8 @@ '''Tools for replacing Ports and functional groups''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional from rdkit import Chem diff --git a/polymerist/rdutils/labeling/__init__.py b/polymerist/rdutils/labeling/__init__.py index 0f331c6..6b067d6 100644 --- a/polymerist/rdutils/labeling/__init__.py +++ b/polymerist/rdutils/labeling/__init__.py @@ -1 +1,4 @@ -'''Tools for assigning numeric and string labels to RDKit atoms, bonds, and molecules''' \ No newline at end of file +'''Tools for assigning numeric and string labels to RDKit atoms, bonds, and molecules''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/rdutils/labeling/atomwise.py b/polymerist/rdutils/labeling/atomwise.py index f391840..90af42f 100644 --- a/polymerist/rdutils/labeling/atomwise.py +++ b/polymerist/rdutils/labeling/atomwise.py @@ -1,5 +1,8 @@ '''For obtaining info from and for labelling individual RDKit Atoms''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, Generator from rdkit.Chem.rdchem import Atom diff --git a/polymerist/rdutils/labeling/bijection.py b/polymerist/rdutils/labeling/bijection.py index f346def..c5b1132 100644 --- a/polymerist/rdutils/labeling/bijection.py +++ b/polymerist/rdutils/labeling/bijection.py @@ -1,5 +1,8 @@ '''For mapping 1-to-1 between two allegedly identical molecules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator from rdkit.Chem.rdchem import Atom, Mol diff --git a/polymerist/rdutils/labeling/bondwise.py b/polymerist/rdutils/labeling/bondwise.py index 8632d3b..1222678 100644 --- a/polymerist/rdutils/labeling/bondwise.py +++ b/polymerist/rdutils/labeling/bondwise.py @@ -1,5 +1,8 @@ '''For obtaining info from and for labelling individual RDKit Bonds''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Callable, Iterable, Optional from itertools import combinations diff --git a/polymerist/rdutils/labeling/molwise.py b/polymerist/rdutils/labeling/molwise.py index 1a1a7dd..d920ecc 100644 --- a/polymerist/rdutils/labeling/molwise.py +++ b/polymerist/rdutils/labeling/molwise.py @@ -1,5 +1,8 @@ '''For reading, writing, and clearing labels from all Atoms and/or Bonds in an RDKit molecule''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, Iterable, Optional, Union from rdkit.Chem.rdchem import Mol diff --git a/polymerist/rdutils/rdconvert.py b/polymerist/rdutils/rdconvert.py index 8c59d23..89df721 100644 --- a/polymerist/rdutils/rdconvert.py +++ b/polymerist/rdutils/rdconvert.py @@ -1,5 +1,8 @@ '''For conversion of RDKit Mols back and forth between different format encodings - often imbues a desired side effect (such as 2D-projection)''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from abc import ABC, abstractmethod, abstractproperty from rdkit import Chem from rdkit.Chem.rdchem import Mol diff --git a/polymerist/rdutils/rdcoords/__init__.py b/polymerist/rdutils/rdcoords/__init__.py index e567589..5c79be8 100644 --- a/polymerist/rdutils/rdcoords/__init__.py +++ b/polymerist/rdutils/rdcoords/__init__.py @@ -1 +1,4 @@ -'''RDKit Mol coordinate generation and coordinate transforms''' \ No newline at end of file +'''RDKit Mol coordinate generation and coordinate transforms''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/rdutils/rdcoords/tiling.py b/polymerist/rdutils/rdcoords/tiling.py index 8330fa3..a025dd4 100644 --- a/polymerist/rdutils/rdcoords/tiling.py +++ b/polymerist/rdutils/rdcoords/tiling.py @@ -1,5 +1,8 @@ '''For generating periodically-tiled topologies from RDKit Mols''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import numpy as np from rdkit import Chem diff --git a/polymerist/rdutils/rderrors.py b/polymerist/rdutils/rderrors.py index f9420a6..f45cbab 100644 --- a/polymerist/rdutils/rderrors.py +++ b/polymerist/rdutils/rderrors.py @@ -1,6 +1,9 @@ '''Custom exceptions specific to RDKit-related functionality''' -# TODO : consider divvying up this module down if few other modules import individual errors +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +# TODO : consider divvying up this module down if few other modules import individual errors class SubstructMatchFailedError(Exception): '''Raised when molecule graph isomorphism match does not form a cover''' pass diff --git a/polymerist/rdutils/rdgraphs.py b/polymerist/rdutils/rdgraphs.py index 16787a0..a8c1732 100644 --- a/polymerist/rdutils/rdgraphs.py +++ b/polymerist/rdutils/rdgraphs.py @@ -1,5 +1,8 @@ '''Utilities for interfacing between RDKit Mols and their graph representations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Iterable from rdkit import Chem diff --git a/polymerist/rdutils/rdkdraw.py b/polymerist/rdutils/rdkdraw.py index 9d6f3eb..ae11fbc 100644 --- a/polymerist/rdutils/rdkdraw.py +++ b/polymerist/rdutils/rdkdraw.py @@ -1,5 +1,8 @@ '''Tools for drawing and visulaizing RDKit molecules''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional, Type, Union import PIL diff --git a/polymerist/rdutils/rdprops.py b/polymerist/rdutils/rdprops.py index 1d14975..4f74dcf 100644 --- a/polymerist/rdutils/rdprops.py +++ b/polymerist/rdutils/rdprops.py @@ -1,5 +1,8 @@ '''For assigning, transferring, and removing properties of RDKit objects''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Optional, TypeVar, TypeAlias, Union from copy import deepcopy diff --git a/polymerist/rdutils/reactions/__init__.py b/polymerist/rdutils/reactions/__init__.py index 09dc3c0..47920de 100644 --- a/polymerist/rdutils/reactions/__init__.py +++ b/polymerist/rdutils/reactions/__init__.py @@ -1 +1,4 @@ -'''Utilities for representing and modelling chemical reactions between RDKit molecules''' \ No newline at end of file +'''Utilities for representing and modelling chemical reactions between RDKit molecules''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/rdutils/reactions/assembly.py b/polymerist/rdutils/reactions/assembly.py index a97e570..99d418f 100644 --- a/polymerist/rdutils/reactions/assembly.py +++ b/polymerist/rdutils/reactions/assembly.py @@ -1,5 +1,8 @@ '''Tools for simplifying the construction of reaction templates''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from dataclasses import dataclass, field from typing import Iterable, Optional, Union diff --git a/polymerist/rdutils/reactions/fragment.py b/polymerist/rdutils/reactions/fragment.py index f515df7..3a8068e 100644 --- a/polymerist/rdutils/reactions/fragment.py +++ b/polymerist/rdutils/reactions/fragment.py @@ -1,5 +1,8 @@ '''For fragmenting molecules by reaction and residue information''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator from abc import ABC, abstractmethod from itertools import combinations diff --git a/polymerist/rdutils/reactions/reactexc.py b/polymerist/rdutils/reactions/reactexc.py index 8461238..2444986 100644 --- a/polymerist/rdutils/reactions/reactexc.py +++ b/polymerist/rdutils/reactions/reactexc.py @@ -1,5 +1,8 @@ '''Exceptions specific to reactions''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + class BadNumberReactants(Exception): '''To be raised when too many or too few Mols are provided than expected''' pass diff --git a/polymerist/rdutils/reactions/reactions.py b/polymerist/rdutils/reactions/reactions.py index 28b559b..ff3a495 100644 --- a/polymerist/rdutils/reactions/reactions.py +++ b/polymerist/rdutils/reactions/reactions.py @@ -1,5 +1,8 @@ '''Classes for representing information about reaction mechanisms and tracing bonds and atoms along a reaction''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import ClassVar, Iterable, Optional, Sequence, Union from dataclasses import dataclass, field diff --git a/polymerist/rdutils/reactions/reactors.py b/polymerist/rdutils/reactions/reactors.py index 5813fc5..30b96c3 100644 --- a/polymerist/rdutils/reactions/reactors.py +++ b/polymerist/rdutils/reactions/reactors.py @@ -1,5 +1,8 @@ '''Classes for implementing reactions with respect to some set of reactant RDKit Mols''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import ClassVar, Generator, Iterable, Optional, Type from dataclasses import dataclass, field from itertools import chain diff --git a/polymerist/smileslib/__init__.py b/polymerist/smileslib/__init__.py index 2d5c23b..647f88b 100644 --- a/polymerist/smileslib/__init__.py +++ b/polymerist/smileslib/__init__.py @@ -1,3 +1,6 @@ '''General-purpose utilities related to SMILES and SMARTS string manipulations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from .primitives import is_valid_SMILES, is_valid_SMARTS \ No newline at end of file diff --git a/polymerist/smileslib/chemdbqueries.py b/polymerist/smileslib/chemdbqueries.py index a54d26b..5b3c480 100644 --- a/polymerist/smileslib/chemdbqueries.py +++ b/polymerist/smileslib/chemdbqueries.py @@ -1,5 +1,8 @@ '''For querying chemical databases for information about molecules specified by SMILES string and other structures''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/smileslib/functgroups/__init__.py b/polymerist/smileslib/functgroups/__init__.py index 3c02dc7..337878b 100644 --- a/polymerist/smileslib/functgroups/__init__.py +++ b/polymerist/smileslib/functgroups/__init__.py @@ -1,5 +1,8 @@ '''SMARTS-based queries for functional groups and other chemical signatures''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + # Creating module-specific logger import logging LOGGER = logging.getLogger(__name__) diff --git a/polymerist/smileslib/functgroups/_daylight_scrape.py b/polymerist/smileslib/functgroups/_daylight_scrape.py index 5ea9369..7100a97 100644 --- a/polymerist/smileslib/functgroups/_daylight_scrape.py +++ b/polymerist/smileslib/functgroups/_daylight_scrape.py @@ -1,5 +1,8 @@ '''Backend web-scraping to (re)build SMARTS lookup table from the Daylight SMARTS official site''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from dataclasses import dataclass import requests diff --git a/polymerist/smileslib/primitives.py b/polymerist/smileslib/primitives.py index f192b16..0134f3c 100644 --- a/polymerist/smileslib/primitives.py +++ b/polymerist/smileslib/primitives.py @@ -1,5 +1,8 @@ '''SMILES and SMARTS primitives and functions for validation''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from rdkit import Chem from rdkit.Chem.rdchem import BondType diff --git a/polymerist/smileslib/substructures.py b/polymerist/smileslib/substructures.py index e8a1ba8..7a3d9a2 100644 --- a/polymerist/smileslib/substructures.py +++ b/polymerist/smileslib/substructures.py @@ -1,5 +1,8 @@ '''Utilities related to handling SMARTS queries''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Generator, TypeVar T = TypeVar('T') diff --git a/polymerist/tests/__init__.py b/polymerist/tests/__init__.py index e131175..684fb22 100644 --- a/polymerist/tests/__init__.py +++ b/polymerist/tests/__init__.py @@ -1,3 +1,6 @@ """ Empty init file in case you choose a package besides PyTest such as Nose which may look for such a file. """ + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/analysis/__init__.py b/polymerist/tests/analysis/__init__.py index ba40be5..3e9bd52 100644 --- a/polymerist/tests/analysis/__init__.py +++ b/polymerist/tests/analysis/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `analysis` package''' \ No newline at end of file +'''Unit tests for `analysis` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/data/__init__.py b/polymerist/tests/data/__init__.py index b591267..fe4e898 100644 --- a/polymerist/tests/data/__init__.py +++ b/polymerist/tests/data/__init__.py @@ -1 +1,4 @@ -'''Reference data used to load or verify unit tests''' \ No newline at end of file +'''Reference data used to load or verify unit tests''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/__init__.py b/polymerist/tests/genutils/__init__.py index 954d19f..1682af2 100644 --- a/polymerist/tests/genutils/__init__.py +++ b/polymerist/tests/genutils/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `genutils` package''' \ No newline at end of file +'''Unit tests for `genutils` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/decorators/__init__.py b/polymerist/tests/genutils/decorators/__init__.py index 1573261..b477cf7 100644 --- a/polymerist/tests/genutils/decorators/__init__.py +++ b/polymerist/tests/genutils/decorators/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `decorators` package''' \ No newline at end of file +'''Unit tests for `decorators` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/fileutils/__init__.py b/polymerist/tests/genutils/fileutils/__init__.py index 7f49e8c..eeab20a 100644 --- a/polymerist/tests/genutils/fileutils/__init__.py +++ b/polymerist/tests/genutils/fileutils/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `fileutils` package''' \ No newline at end of file +'''Unit tests for `fileutils` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/fileutils/jsonio/__init__.py b/polymerist/tests/genutils/fileutils/jsonio/__init__.py index 6bf0714..3076f0f 100644 --- a/polymerist/tests/genutils/fileutils/jsonio/__init__.py +++ b/polymerist/tests/genutils/fileutils/jsonio/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `jsonio` package''' \ No newline at end of file +'''Unit tests for `jsonio` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/fileutils/test_filetree.py b/polymerist/tests/genutils/fileutils/test_filetree.py index d220959..aacf093 100644 --- a/polymerist/tests/genutils/fileutils/test_filetree.py +++ b/polymerist/tests/genutils/fileutils/test_filetree.py @@ -1,5 +1,8 @@ '''Unit tests for filetree operations''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import pytest from pathlib import Path from anytree.iterators import PreOrderIter, LevelOrderGroupIter diff --git a/polymerist/tests/genutils/logutils/__init__.py b/polymerist/tests/genutils/logutils/__init__.py index 88e053e..a9a0adb 100644 --- a/polymerist/tests/genutils/logutils/__init__.py +++ b/polymerist/tests/genutils/logutils/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `logutils` package''' \ No newline at end of file +'''Unit tests for `logutils` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/sequences/__init__.py b/polymerist/tests/genutils/sequences/__init__.py index c754f33..487bc78 100644 --- a/polymerist/tests/genutils/sequences/__init__.py +++ b/polymerist/tests/genutils/sequences/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `sequences` package''' \ No newline at end of file +'''Unit tests for `sequences` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/sequences/discernment/__init__.py b/polymerist/tests/genutils/sequences/discernment/__init__.py index 6cee0a6..5c0dc4e 100644 --- a/polymerist/tests/genutils/sequences/discernment/__init__.py +++ b/polymerist/tests/genutils/sequences/discernment/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `discernment` package''' \ No newline at end of file +'''Unit tests for `discernment` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/sequences/discernment/test_discernment.py b/polymerist/tests/genutils/sequences/discernment/test_discernment.py index 728a362..c329da7 100644 --- a/polymerist/tests/genutils/sequences/discernment/test_discernment.py +++ b/polymerist/tests/genutils/sequences/discernment/test_discernment.py @@ -1,5 +1,8 @@ '''Unit tests for DISCERNMENT-related functionality''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import pytest from polymerist.genutils.importutils.pkginspect import get_file_path_within_package from polymerist.tests import data as testdata diff --git a/polymerist/tests/genutils/sequences/similarity/__init__.py b/polymerist/tests/genutils/sequences/similarity/__init__.py index dea3ddf..ca258ec 100644 --- a/polymerist/tests/genutils/sequences/similarity/__init__.py +++ b/polymerist/tests/genutils/sequences/similarity/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `similarity` package''' \ No newline at end of file +'''Unit tests for `similarity` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/test_attrs.py b/polymerist/tests/genutils/test_attrs.py index c594bf0..5f1ac8f 100644 --- a/polymerist/tests/genutils/test_attrs.py +++ b/polymerist/tests/genutils/test_attrs.py @@ -1,5 +1,8 @@ '''Unit test for attribute inspection''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from polymerist.genutils.attrs import compile_argfree_getable_attrs import pytest diff --git a/polymerist/tests/genutils/test_pkginspect.py b/polymerist/tests/genutils/test_pkginspect.py index 6a6e576..ea07262 100644 --- a/polymerist/tests/genutils/test_pkginspect.py +++ b/polymerist/tests/genutils/test_pkginspect.py @@ -1,5 +1,8 @@ '''Unit tests for package inspection utilities''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from types import ModuleType import pytest diff --git a/polymerist/tests/genutils/textual/__init__.py b/polymerist/tests/genutils/textual/__init__.py index 08f7241..e722b60 100644 --- a/polymerist/tests/genutils/textual/__init__.py +++ b/polymerist/tests/genutils/textual/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `textual` package''' \ No newline at end of file +'''Unit tests for `textual` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/trees/__init__.py b/polymerist/tests/genutils/trees/__init__.py index ff0aa15..b2a880c 100644 --- a/polymerist/tests/genutils/trees/__init__.py +++ b/polymerist/tests/genutils/trees/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `trees` package''' \ No newline at end of file +'''Unit tests for `trees` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/trees/test_trees.py b/polymerist/tests/genutils/trees/test_trees.py index fdfe98e..44e408b 100644 --- a/polymerist/tests/genutils/trees/test_trees.py +++ b/polymerist/tests/genutils/trees/test_trees.py @@ -1,5 +1,8 @@ '''Unit tests for trees''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any import pytest diff --git a/polymerist/tests/genutils/typetools/__init__.py b/polymerist/tests/genutils/typetools/__init__.py index 30fee19..c2e840a 100644 --- a/polymerist/tests/genutils/typetools/__init__.py +++ b/polymerist/tests/genutils/typetools/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `typetools` package''' \ No newline at end of file +'''Unit tests for `typetools` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/graphics/__init__.py b/polymerist/tests/graphics/__init__.py index 245b172..e9b9a2f 100644 --- a/polymerist/tests/graphics/__init__.py +++ b/polymerist/tests/graphics/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `graphics` package''' \ No newline at end of file +'''Unit tests for `graphics` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/__init__.py b/polymerist/tests/maths/__init__.py index 3a4fe3d..b8a824f 100644 --- a/polymerist/tests/maths/__init__.py +++ b/polymerist/tests/maths/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `maths` package''' \ No newline at end of file +'''Unit tests for `maths` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/combinatorics/__init__.py b/polymerist/tests/maths/combinatorics/__init__.py index cd69607..9bd2cae 100644 --- a/polymerist/tests/maths/combinatorics/__init__.py +++ b/polymerist/tests/maths/combinatorics/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `combinatorics` package''' \ No newline at end of file +'''Unit tests for `combinatorics` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/fractions/__init__.py b/polymerist/tests/maths/fractions/__init__.py index 6f0d592..686dec7 100644 --- a/polymerist/tests/maths/fractions/__init__.py +++ b/polymerist/tests/maths/fractions/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `fractions` package''' \ No newline at end of file +'''Unit tests for `fractions` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/greek/__init__.py b/polymerist/tests/maths/greek/__init__.py index 2abe4a8..94177f7 100644 --- a/polymerist/tests/maths/greek/__init__.py +++ b/polymerist/tests/maths/greek/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `greek` package''' \ No newline at end of file +'''Unit tests for `greek` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/lattices/__init__.py b/polymerist/tests/maths/lattices/__init__.py index d23663d..a190931 100644 --- a/polymerist/tests/maths/lattices/__init__.py +++ b/polymerist/tests/maths/lattices/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `lattices` package''' \ No newline at end of file +'''Unit tests for `lattices` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/linearalg/__init__.py b/polymerist/tests/maths/linearalg/__init__.py index 79f09b2..e8c1721 100644 --- a/polymerist/tests/maths/linearalg/__init__.py +++ b/polymerist/tests/maths/linearalg/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `linearalg` package''' \ No newline at end of file +'''Unit tests for `linearalg` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/maths/numbersys/__init__.py b/polymerist/tests/maths/numbersys/__init__.py index 3c95aa1..0c2f73f 100644 --- a/polymerist/tests/maths/numbersys/__init__.py +++ b/polymerist/tests/maths/numbersys/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `numbersys` package''' \ No newline at end of file +'''Unit tests for `numbersys` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/__init__.py b/polymerist/tests/mdtools/__init__.py index 88e220b..bbff0b3 100644 --- a/polymerist/tests/mdtools/__init__.py +++ b/polymerist/tests/mdtools/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `mdtools` package''' \ No newline at end of file +'''Unit tests for `mdtools` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/lammpstools/__init__.py b/polymerist/tests/mdtools/lammpstools/__init__.py index f105c90..5bc51d1 100644 --- a/polymerist/tests/mdtools/lammpstools/__init__.py +++ b/polymerist/tests/mdtools/lammpstools/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `lammpstools` package''' \ No newline at end of file +'''Unit tests for `lammpstools` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/openfftools/__init__.py b/polymerist/tests/mdtools/openfftools/__init__.py index d2e4e9d..aea6512 100644 --- a/polymerist/tests/mdtools/openfftools/__init__.py +++ b/polymerist/tests/mdtools/openfftools/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `openfftools` package''' \ No newline at end of file +'''Unit tests for `openfftools` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/openfftools/partialcharge/__init__.py b/polymerist/tests/mdtools/openfftools/partialcharge/__init__.py index 837792d..a5f8428 100644 --- a/polymerist/tests/mdtools/openfftools/partialcharge/__init__.py +++ b/polymerist/tests/mdtools/openfftools/partialcharge/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `partialcharge` package''' \ No newline at end of file +'''Unit tests for `partialcharge` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/openfftools/partialcharge/rescharge/__init__.py b/polymerist/tests/mdtools/openfftools/partialcharge/rescharge/__init__.py index af9bb9f..fb4ba94 100644 --- a/polymerist/tests/mdtools/openfftools/partialcharge/rescharge/__init__.py +++ b/polymerist/tests/mdtools/openfftools/partialcharge/rescharge/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `rescharge` package''' \ No newline at end of file +'''Unit tests for `rescharge` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/openfftools/solvation/__init__.py b/polymerist/tests/mdtools/openfftools/solvation/__init__.py index 6f5bcbc..58874b4 100644 --- a/polymerist/tests/mdtools/openfftools/solvation/__init__.py +++ b/polymerist/tests/mdtools/openfftools/solvation/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `solvation` package''' \ No newline at end of file +'''Unit tests for `solvation` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/openfftools/solvation/solvents/__init__.py b/polymerist/tests/mdtools/openfftools/solvation/solvents/__init__.py index e7e7c7b..acf5d8b 100644 --- a/polymerist/tests/mdtools/openfftools/solvation/solvents/__init__.py +++ b/polymerist/tests/mdtools/openfftools/solvation/solvents/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `solvents` package''' \ No newline at end of file +'''Unit tests for `solvents` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/mdtools/openmmtools/__init__.py b/polymerist/tests/mdtools/openmmtools/__init__.py index 44b2ca5..7b9a753 100644 --- a/polymerist/tests/mdtools/openmmtools/__init__.py +++ b/polymerist/tests/mdtools/openmmtools/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `openmmtools` package''' \ No newline at end of file +'''Unit tests for `openmmtools` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/polymers/__init__.py b/polymerist/tests/polymers/__init__.py index 7622def..f37a37e 100644 --- a/polymerist/tests/polymers/__init__.py +++ b/polymerist/tests/polymers/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `polymers` package''' \ No newline at end of file +'''Unit tests for `polymers` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/polymers/monomers/__init__.py b/polymerist/tests/polymers/monomers/__init__.py index c8f2d69..9a1f876 100644 --- a/polymerist/tests/polymers/monomers/__init__.py +++ b/polymerist/tests/polymers/monomers/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `monomers` package''' \ No newline at end of file +'''Unit tests for `monomers` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/polymers/smidgelib/__init__.py b/polymerist/tests/polymers/smidgelib/__init__.py index a66a8af..957b84a 100644 --- a/polymerist/tests/polymers/smidgelib/__init__.py +++ b/polymerist/tests/polymers/smidgelib/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `smidgelib` package''' \ No newline at end of file +'''Unit tests for `smidgelib` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/rdutils/__init__.py b/polymerist/tests/rdutils/__init__.py index c518ab5..430bb51 100644 --- a/polymerist/tests/rdutils/__init__.py +++ b/polymerist/tests/rdutils/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `rdutils` package''' \ No newline at end of file +'''Unit tests for `rdutils` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/rdutils/bonding/__init__.py b/polymerist/tests/rdutils/bonding/__init__.py index 8ce1af8..9913aa0 100644 --- a/polymerist/tests/rdutils/bonding/__init__.py +++ b/polymerist/tests/rdutils/bonding/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `bonding` package''' \ No newline at end of file +'''Unit tests for `bonding` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/rdutils/labeling/__init__.py b/polymerist/tests/rdutils/labeling/__init__.py index 049964f..a8b557a 100644 --- a/polymerist/tests/rdutils/labeling/__init__.py +++ b/polymerist/tests/rdutils/labeling/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `labeling` package''' \ No newline at end of file +'''Unit tests for `labeling` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/rdutils/rdcoords/__init__.py b/polymerist/tests/rdutils/rdcoords/__init__.py index 9adead5..31c33fa 100644 --- a/polymerist/tests/rdutils/rdcoords/__init__.py +++ b/polymerist/tests/rdutils/rdcoords/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `rdcoords` package''' \ No newline at end of file +'''Unit tests for `rdcoords` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/rdutils/reactions/__init__.py b/polymerist/tests/rdutils/reactions/__init__.py index 413d674..311bb04 100644 --- a/polymerist/tests/rdutils/reactions/__init__.py +++ b/polymerist/tests/rdutils/reactions/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `reactions` package''' \ No newline at end of file +'''Unit tests for `reactions` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/smileslib/__init__.py b/polymerist/tests/smileslib/__init__.py index b9c1d21..56d4f5a 100644 --- a/polymerist/tests/smileslib/__init__.py +++ b/polymerist/tests/smileslib/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `smileslib` package''' \ No newline at end of file +'''Unit tests for `smileslib` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/smileslib/functgroups/__init__.py b/polymerist/tests/smileslib/functgroups/__init__.py index 2d5efe7..ff3dd30 100644 --- a/polymerist/tests/smileslib/functgroups/__init__.py +++ b/polymerist/tests/smileslib/functgroups/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `functgroups` package''' \ No newline at end of file +'''Unit tests for `functgroups` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/test_polymerist.py b/polymerist/tests/test_polymerist.py index 4d1c730..0076e96 100644 --- a/polymerist/tests/test_polymerist.py +++ b/polymerist/tests/test_polymerist.py @@ -2,6 +2,9 @@ Unit and regression test for the polymerist package. """ +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + # Import package, test suite, and other packages as needed import sys diff --git a/polymerist/tests/unitutils/__init__.py b/polymerist/tests/unitutils/__init__.py index a222fa9..8f0ff50 100644 --- a/polymerist/tests/unitutils/__init__.py +++ b/polymerist/tests/unitutils/__init__.py @@ -1 +1,4 @@ -'''Unit tests for `unitutils` package''' \ No newline at end of file +'''Unit tests for `unitutils` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/unitutils/__init__.py b/polymerist/unitutils/__init__.py index 831307c..344bdb9 100644 --- a/polymerist/unitutils/__init__.py +++ b/polymerist/unitutils/__init__.py @@ -1 +1,4 @@ -'''Physical constants, dimensional analysis, and unit conversion utilities''' \ No newline at end of file +'''Physical constants, dimensional analysis, and unit conversion utilities''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/unitutils/dimensions.py b/polymerist/unitutils/dimensions.py index e4ceba6..15acd2a 100644 --- a/polymerist/unitutils/dimensions.py +++ b/polymerist/unitutils/dimensions.py @@ -1,5 +1,8 @@ '''For checking dimensionality and presence of units''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Any, Union from pint import Quantity as PintQuantity # this is also the base class for all OpenFF-style units diff --git a/polymerist/unitutils/extraunits.py b/polymerist/unitutils/extraunits.py index 3c01f8e..f425ecf 100644 --- a/polymerist/unitutils/extraunits.py +++ b/polymerist/unitutils/extraunits.py @@ -1,5 +1,8 @@ '''Defining units which, for one reason or another, are not defined in Pint or OpenMM units''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + import sys _MODULE = sys.modules[__name__] diff --git a/polymerist/unitutils/interop.py b/polymerist/unitutils/interop.py index 710815a..b36b7d3 100644 --- a/polymerist/unitutils/interop.py +++ b/polymerist/unitutils/interop.py @@ -1,5 +1,8 @@ '''Decorators for handling interconversion between the OpenMM and OpenFF (Pint) unit engines''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Callable, TypeVar R = TypeVar('R') # for representing generic return values Q = TypeVar('Q') # for representing generic Quantity-like objects diff --git a/polymerist/unitutils/physconsts.py b/polymerist/unitutils/physconsts.py index 64cd27e..fa3eca5 100644 --- a/polymerist/unitutils/physconsts.py +++ b/polymerist/unitutils/physconsts.py @@ -1,5 +1,8 @@ '''Unit-aware compendium of useful physical constants''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from scipy.constants import physical_constants from .unitstr import unit_from_unit_str diff --git a/polymerist/unitutils/unitstr.py b/polymerist/unitutils/unitstr.py index a24a18d..7e3bf54 100644 --- a/polymerist/unitutils/unitstr.py +++ b/polymerist/unitutils/unitstr.py @@ -1,5 +1,8 @@ '''Utilities for looking up and producing Units from strings''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional from openmm import unit as openmm_unit from openmm.unit import Unit, dimensionless From a7e34d61f376b35b472812a0fc121ea12e4b0eb7 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 19 Nov 2024 16:22:02 -0700 Subject: [PATCH 074/191] Added sphinx requirement, deprecated chemspipy dependency --- devtools/conda-envs/release-build.yml | 4 ++-- devtools/conda-envs/test-env.yml | 5 +++-- polymerist/smileslib/chemdbqueries.py | 4 +--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/devtools/conda-envs/release-build.yml b/devtools/conda-envs/release-build.yml index 9caedc1..00e4b7a 100644 --- a/devtools/conda-envs/release-build.yml +++ b/devtools/conda-envs/release-build.yml @@ -8,10 +8,11 @@ dependencies: - pip - jupyterlab - # Testing + # Testing and docs - pytest - pytest-cov - codecov + - sphinx # Numerical libraries - numpy @@ -46,6 +47,5 @@ dependencies: # Chemical database queries - cirpy - pubchempy - - chemspipy - pip: - espaloma-charge ==0.0.8 \ No newline at end of file diff --git a/devtools/conda-envs/test-env.yml b/devtools/conda-envs/test-env.yml index 031b1fc..447671e 100644 --- a/devtools/conda-envs/test-env.yml +++ b/devtools/conda-envs/test-env.yml @@ -8,10 +8,11 @@ dependencies: - pip - jupyterlab - # Testing + # Testing and docs - pytest - pytest-cov - codecov + - sphinx # Numerical libraries - numpy @@ -27,6 +28,7 @@ dependencies: # Molecule building - mbuild + - openbabel - rdkit - openeye-toolkits # TODO: consider making this optional? @@ -45,6 +47,5 @@ dependencies: # Chemical database queries - cirpy - pubchempy - - chemspipy - pip: - espaloma-charge ==0.0.8 \ No newline at end of file diff --git a/polymerist/smileslib/chemdbqueries.py b/polymerist/smileslib/chemdbqueries.py index 5b3c480..936f331 100644 --- a/polymerist/smileslib/chemdbqueries.py +++ b/polymerist/smileslib/chemdbqueries.py @@ -29,6 +29,4 @@ def get_property_from_smiles(smiles : str, prop_name : str='iupac_name') -> Opti if pc_prop_name in prop_query: return prop_query[pc_prop_name] else: - return None - - # TODO : add ChemSpider once I can obtain an API key (https://chemspipy.readthedocs.io/en/latest/guide/intro.html#apikey) \ No newline at end of file + return None \ No newline at end of file From 2a19f181fb52725ec221a040fb0ba7d6a41baaeb Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 16:17:40 -0700 Subject: [PATCH 075/191] Added importutils submodule for dynamically checking module dependencies --- .../genutils/importutils/dependencies.py | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 polymerist/genutils/importutils/dependencies.py diff --git a/polymerist/genutils/importutils/dependencies.py b/polymerist/genutils/importutils/dependencies.py new file mode 100644 index 0000000..26d14b1 --- /dev/null +++ b/polymerist/genutils/importutils/dependencies.py @@ -0,0 +1,81 @@ +'''Utilities for checking and enforcing module dependencies within code''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +from typing import Callable, ParamSpec, TypeVar + +Params = ParamSpec('Params') +ReturnType = TypeVar('ReturnType') +TCall = Callable[Params, ReturnType] # generic function of callable class + +import importlib +from functools import wraps + + +def module_installed(module_name : str) -> bool: + ''' + Check whether a module of the given name is present on the system + + Parameters + ---------- + module_name : str + The name of the module, as it would occur in an import statement + Do not support direct passing of module objects to avoid circularity + (i.e. no reason to check if a module is present if one has already imported it elsewhere) + + Returns + ------- + module_found : bool + Whether or not the module was found to be installed in the current working environment + ''' + try: + package = importlib.import_module(module_name) + except ModuleNotFoundError: + return False + else: + return True + +def modules_installed(*module_names : list[str]) -> bool: + ''' + Check whether one or more modules are all present + Will only return true if ALL specified modules are found + + Parameters + ---------- + module_names : *str + Any number of module names, passed as a comma-separated sequence of strings + + Returns + ------- + all_modules_found : bool + Whether or not all modules were found to be installed in the current working environment + ''' + return all(module_installed(module_name) for module_name in module_names) + +def requires_modules(*required_module_names : list[str]) -> Callable[[TCall[..., ReturnType]], TCall[..., ReturnType]]: + ''' + Decorator which enforces optional module dependencies prior to function execution + + Parameters + ---------- + module_names : *str + Any number of module names, passed as a comma-separated sequence of strings + + Raises + ------ + ImportError : Exception + Raised if any of the specified packages is not found to be installed + Exception message will indicate the name of the specific package found missing + ''' + def decorator(func) -> TCall[..., ReturnType]: + @wraps(func) + def req_wrapper(*args : Params.args, **kwargs : Params.kwargs) -> ReturnType: + for module_name in required_module_names: + if not module_installed(module_name): + raise ImportError(f'No installation found for module "{module_name}"') + else: + return func(*args, **kwargs) + + return req_wrapper + return decorator \ No newline at end of file From 129a7b158c3a52084ebec16bfe32634f7e36f69c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 17:13:46 -0700 Subject: [PATCH 076/191] Moved importutils tests into dedicated tests.gentutils.importutils subdirectory --- polymerist/tests/genutils/importutils/__init__.py | 4 ++++ .../tests/genutils/{ => importutils}/test_pkginspect.py | 0 2 files changed, 4 insertions(+) create mode 100644 polymerist/tests/genutils/importutils/__init__.py rename polymerist/tests/genutils/{ => importutils}/test_pkginspect.py (100%) diff --git a/polymerist/tests/genutils/importutils/__init__.py b/polymerist/tests/genutils/importutils/__init__.py new file mode 100644 index 0000000..c08e2b6 --- /dev/null +++ b/polymerist/tests/genutils/importutils/__init__.py @@ -0,0 +1,4 @@ +'''Unit tests for `importutils` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/test_pkginspect.py b/polymerist/tests/genutils/importutils/test_pkginspect.py similarity index 100% rename from polymerist/tests/genutils/test_pkginspect.py rename to polymerist/tests/genutils/importutils/test_pkginspect.py From d077bd406629c24e113d3cdbd5c3fe97b39e740d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 17:41:37 -0700 Subject: [PATCH 077/191] Reimplemented find_module() with importlib.util.find_spec() to avoid actual import during check --- .../genutils/importutils/dependencies.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/polymerist/genutils/importutils/dependencies.py b/polymerist/genutils/importutils/dependencies.py index 26d14b1..747b569 100644 --- a/polymerist/genutils/importutils/dependencies.py +++ b/polymerist/genutils/importutils/dependencies.py @@ -9,7 +9,8 @@ ReturnType = TypeVar('ReturnType') TCall = Callable[Params, ReturnType] # generic function of callable class -import importlib +# from importlib import import_module +from importlib.util import find_spec from functools import wraps @@ -29,12 +30,17 @@ def module_installed(module_name : str) -> bool: module_found : bool Whether or not the module was found to be installed in the current working environment ''' - try: - package = importlib.import_module(module_name) - except ModuleNotFoundError: + # try: + # package = import_module(module_name) + # except ModuleNotFoundError: + # return False + # else: + # return True + + try: # NOTE: opted for this implementation, as it never actually imports the package in question (faster and fewer side-effects) + return find_spec(module_name) is not None + except (ValueError, AttributeError, ModuleNotFoundError): # these could all be raised by return False - else: - return True def modules_installed(*module_names : list[str]) -> bool: ''' From ff82901b4e558a364f0a5cca55f88be11b3bff99 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 17:41:55 -0700 Subject: [PATCH 078/191] Wrte unit tests for importutils.dependencies --- .../genutils/importutils/test_dependencies.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 polymerist/tests/genutils/importutils/test_dependencies.py diff --git a/polymerist/tests/genutils/importutils/test_dependencies.py b/polymerist/tests/genutils/importutils/test_dependencies.py new file mode 100644 index 0000000..34395b9 --- /dev/null +++ b/polymerist/tests/genutils/importutils/test_dependencies.py @@ -0,0 +1,46 @@ +'''Unit tests for dependency checking utilities''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import pytest + +from typing import Any, Callable +from polymerist.genutils.importutils import dependencies + + +# Testing module finding +@pytest.mark.parametrize( + 'module_names, expected_found', [ + (['polymerist'], True), # we'd better hope the parent module is present if we're running tests on it :P + (['sys'], True), # test stdlib packages which ought to be present if Python is + (['os', 'sys'], True), # test that unpacking also works + (['fake--module'], False), # test an obviously fake module name (don't want to try an actual module in case it becomes an dependency someday) + ([42], False), # test something that isn't even a module to check error handling + ] +) +def test_modules_installed(module_names : list[str], expected_found : bool) -> None: + '''Check that module install checker correctly identifies present and absent modules''' + assert dependencies.modules_installed(*module_names) == expected_found + +# Testing requires_modules decorator +@dependencies.requires_modules('os') +def should_pass() -> str: + '''Dummy function to test requires_modules decorator for dependencies that are present''' + return 'I will run!' + +@dependencies.requires_modules('fake--module') +def should_fail() -> str: + '''Dummy function to test requires_modules decorator for dependencies that are present''' + return 'I will xfail :(' + +@pytest.mark.parametrize( + 'func', + [ + should_pass, + pytest.param(should_fail, marks=pytest.mark.xfail(raises=ImportError, reason='The required module shouldn\'t be found in the environment', strict=True)), + ] +) +def test_requires_modules(func : Callable[..., Any]) -> None: + '''Test that the requires_modules decortor correctly wraps functions''' + _ = func() # no assertion needed, xfail cases should raise Exception while working cases will ternimate without Exception \ No newline at end of file From c7ac3f7fe48bc6ad18bbff9db4e94959485b0536 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 18:09:22 -0700 Subject: [PATCH 079/191] Standardized docstrings --- polymerist/tests/genutils/importutils/test_dependencies.py | 2 +- polymerist/tests/genutils/importutils/test_pkginspect.py | 2 +- polymerist/tests/genutils/test_attrs.py | 2 +- polymerist/tests/genutils/trees/test_trees.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/polymerist/tests/genutils/importutils/test_dependencies.py b/polymerist/tests/genutils/importutils/test_dependencies.py index 34395b9..eea4ee1 100644 --- a/polymerist/tests/genutils/importutils/test_dependencies.py +++ b/polymerist/tests/genutils/importutils/test_dependencies.py @@ -1,4 +1,4 @@ -'''Unit tests for dependency checking utilities''' +'''Unit tests for `dependencies` package''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/importutils/test_pkginspect.py b/polymerist/tests/genutils/importutils/test_pkginspect.py index ea07262..81627bf 100644 --- a/polymerist/tests/genutils/importutils/test_pkginspect.py +++ b/polymerist/tests/genutils/importutils/test_pkginspect.py @@ -1,4 +1,4 @@ -'''Unit tests for package inspection utilities''' +'''Unit tests for `pkginspect` package`''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/test_attrs.py b/polymerist/tests/genutils/test_attrs.py index 5f1ac8f..87f84c3 100644 --- a/polymerist/tests/genutils/test_attrs.py +++ b/polymerist/tests/genutils/test_attrs.py @@ -1,4 +1,4 @@ -'''Unit test for attribute inspection''' +'''Unit tests for `attrs` package''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/genutils/trees/test_trees.py b/polymerist/tests/genutils/trees/test_trees.py index 44e408b..73755a9 100644 --- a/polymerist/tests/genutils/trees/test_trees.py +++ b/polymerist/tests/genutils/trees/test_trees.py @@ -1,4 +1,4 @@ -'''Unit tests for trees''' +'''Unit tests for tree-related functionality''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' From e14afd73c714bf602cc48d76eccd4cd6fc05a3ef Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 18:09:52 -0700 Subject: [PATCH 080/191] Added unit tests module for mdutils.openfftools.partialcharge.molchargers --- .../openfftools/partialcharge/test_molchargers.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py diff --git a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py new file mode 100644 index 0000000..05795b4 --- /dev/null +++ b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py @@ -0,0 +1,13 @@ +'''Unit tests for `molchargers` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import pytest + +from polymerist.mdtools.openfftools.partialcharge import molchargers + + +def test_CHARGING_METHOD_class_attr() -> None: + '''Test that the MolCharger abstract class attribute "CHARGING_METHOD" is properly registere and enforced''' + pass From 1a1b7020eec40dbcb39e819f1b9bafa533037fa9 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 18:59:13 -0700 Subject: [PATCH 081/191] Filled out unit tests for molchargers --- .../partialcharge/test_molchargers.py | 52 +++++++++++++++++-- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py index 05795b4..60924be 100644 --- a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py +++ b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py @@ -5,9 +5,53 @@ import pytest -from polymerist.mdtools.openfftools.partialcharge import molchargers +from openff.toolkit import Molecule +from polymerist.polymers.monomers.specification import expanded_SMILES +from polymerist.mdtools.openfftools.partialcharge.molchargers import MolCharger +from polymerist.mdtools.openfftools.partialcharge.rescharge.interface import LibraryCharger -def test_CHARGING_METHOD_class_attr() -> None: - '''Test that the MolCharger abstract class attribute "CHARGING_METHOD" is properly registere and enforced''' - pass + +# Test MolCharger subclass registration +def test_molcharger_registers_subclasses() -> None: + '''Test that the MolCharger tracks subclasses''' + assert hasattr(MolCharger, 'subclass_registry') + +@pytest.mark.parametrize('expected_charge_method_name, molcharger_subclass', MolCharger.subclass_registry.items()) # NOTE: this will fail if test_molcharger_registers_subclasses() fails +def test_molcharger_subclass_attr_registration(molcharger_subclass : type[MolCharger], expected_charge_method_name : str) -> None: + '''Test that all MolCharger subclasses define and are registered under their "CHARGING_METHOD" class property''' + assert hasattr(molcharger_subclass, 'CHARGING_METHOD') and (getattr(molcharger_subclass, 'CHARGING_METHOD') == expected_charge_method_name) + + +# Test MolCharger subclass implementations +@pytest.fixture +def offmol() -> Molecule: + '''Dummy Molecule object for testing''' + # DEV: worthing double-checking that partial charges are initially empty?? + return Molecule.from_smiles('c1ccccc1C(=O)O') # benzoic acid - nice and small, but with some non-trivial structure + +MOLCHARGER_TYPES_TO_TEST = [ + molcharger_subclass + for molcharger_subclass in MolCharger.subclass_registry.values() # LibraryCharger behave differently to other MolCharger and are kind of a pain in the ass generally... + if molcharger_subclass != LibraryCharger # ...intend to deprecate and revamp them eventually, so will just exclude them from testing for now +] + +@pytest.mark.parametrize('molcharger_subclass', MOLCHARGER_TYPES_TO_TEST) +def test_molchargers_assign_charges(offmol : Molecule, molcharger_subclass : type[MolCharger]) -> None: + charger = molcharger_subclass() + cmol = charger.charge_molecule(offmol) + assert cmol.partial_charges is not None # should assign charges to the new, copied molecule + +@pytest.mark.parametrize('molcharger_subclass', MOLCHARGER_TYPES_TO_TEST) +def test_molchargers_act_readonly(offmol : Molecule, molcharger_subclass : type[MolCharger]) -> None: + charger = molcharger_subclass() + cmol = charger.charge_molecule(offmol) + assert offmol.partial_charges is None # should NOT affect the + +@pytest.mark.parametrize('molcharger_subclass', MOLCHARGER_TYPES_TO_TEST) +def test_molchargers_record_charge_method(offmol : Molecule, molcharger_subclass : type[MolCharger]) -> None: + charger = molcharger_subclass() + cmol = charger.charge_molecule(offmol) + + recorded_charge_method = cmol.properties.get('charge_method', None) + assert (recorded_charge_method is not None) and (recorded_charge_method == getattr(molcharger_subclass, 'CHARGING_METHOD')) From 84964f16d39cf6a5cd562eb0395946f3d22e87bf Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 18:59:27 -0700 Subject: [PATCH 082/191] Ignored GNN charge model ".model.pt" junk files --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 64b07fd..f4f79d9 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,6 @@ ENV/ # In-tree generated files */_version.py + +# Espaloma junk output +**/.model.pt \ No newline at end of file From 121e61cc33b1a3d1816b563157ec54cf2c1a69d2 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 19:12:22 -0700 Subject: [PATCH 083/191] Added extra arg to specify custom Exception type raised for missing dependencies --- polymerist/genutils/importutils/dependencies.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/importutils/dependencies.py b/polymerist/genutils/importutils/dependencies.py index 747b569..4787afe 100644 --- a/polymerist/genutils/importutils/dependencies.py +++ b/polymerist/genutils/importutils/dependencies.py @@ -59,7 +59,10 @@ def modules_installed(*module_names : list[str]) -> bool: ''' return all(module_installed(module_name) for module_name in module_names) -def requires_modules(*required_module_names : list[str]) -> Callable[[TCall[..., ReturnType]], TCall[..., ReturnType]]: +def requires_modules( + *required_module_names : list[str], + missing_module_error : type[Exception]=ImportError, + ) -> Callable[[TCall[..., ReturnType]], TCall[..., ReturnType]]: ''' Decorator which enforces optional module dependencies prior to function execution @@ -67,6 +70,9 @@ def requires_modules(*required_module_names : list[str]) -> Callable[[TCall[..., ---------- module_names : *str Any number of module names, passed as a comma-separated sequence of strings + missing_module_error : type[Exception], default ImportError + The type of Exception to raise if a module is not found installed + Defaults to ImportError Raises ------ @@ -79,7 +85,7 @@ def decorator(func) -> TCall[..., ReturnType]: def req_wrapper(*args : Params.args, **kwargs : Params.kwargs) -> ReturnType: for module_name in required_module_names: if not module_installed(module_name): - raise ImportError(f'No installation found for module "{module_name}"') + raise missing_module_error(f'No installation found for module "{module_name}"') else: return func(*args, **kwargs) From e13ffe21a7407f9e8e993977419075136afbe3cb Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 19:13:32 -0700 Subject: [PATCH 084/191] Updated MolCharger "CHARGING_METHOD" class attribute to be handled by decorators.classmod.register_abstract_class_attrs() --- .../openfftools/partialcharge/molchargers.py | 21 +++++-------------- .../partialcharge/rescharge/interface.py | 4 +--- 2 files changed, 6 insertions(+), 19 deletions(-) diff --git a/polymerist/mdtools/openfftools/partialcharge/molchargers.py b/polymerist/mdtools/openfftools/partialcharge/molchargers.py index cb8d68e..72082f3 100644 --- a/polymerist/mdtools/openfftools/partialcharge/molchargers.py +++ b/polymerist/mdtools/openfftools/partialcharge/molchargers.py @@ -16,7 +16,7 @@ from .. import TKREGS, _OE_TKWRAPPER_IS_AVAILABLE, OEUnavailableException from .chargemethods import NAGL_MODEL -from ....genutils.decorators.classmod import register_subclasses +from ....genutils.decorators.classmod import register_subclasses, register_abstract_class_attrs from ....genutils.decorators.functional import optional_in_place @@ -32,14 +32,9 @@ def has_partial_charges(mol : Union[Molecule, Chem.Mol]) -> bool: # ABSTRACT AND CONCRETE CLASSES FOR CHARGING MOLECULES @register_subclasses(key_attr='CHARGING_METHOD') +@register_abstract_class_attrs('CHARGING_METHOD') class MolCharger(ABC): '''Base interface for defining various methods of generating and storing atomic partial charges''' - @abstractproperty - @classmethod - def CHARGING_METHOD(cls): - '''For setting the name of the method as a class attribute in child classes''' - pass - @abstractmethod @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: @@ -55,28 +50,22 @@ def charge_molecule(self, uncharged_mol : Molecule) -> None: LOGGER.info(f'Successfully assigned "{self.CHARGING_METHOD}" charges') # CONCRETE IMPLEMENTATIONS OF DIFFERENT CHARGING METHODS -class ABE10Charger(MolCharger): +class ABE10Charger(MolCharger, CHARGING_METHOD= 'AM1-BCC-ELF10'): '''Charger class for AM1-BCC-ELF10 exact charging''' - CHARGING_METHOD : ClassVar[str] = 'AM1-BCC-ELF10' - @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: if not _OE_TKWRAPPER_IS_AVAILABLE: raise OEUnavailableException # AM1-BCC-ELF10 is exclusively available thru OpenEye; if it is not present, then, must err uncharged_mol.assign_partial_charges(partial_charge_method='am1bccelf10', toolkit_registry=TKREGS['OpenEye Toolkit']) # TODO : provide support for AMBER / RDKit if OE license is unavailable -class EspalomaCharger(MolCharger): +class EspalomaCharger(MolCharger, CHARGING_METHOD='Espaloma-AM1-BCC'): '''Charger class for EspalomaCharge charging''' - CHARGING_METHOD : ClassVar[str] = 'Espaloma-AM1-BCC' - @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: uncharged_mol.assign_partial_charges(partial_charge_method='espaloma-am1bcc', toolkit_registry=TKREGS['Espaloma Charge Toolkit']) -class NAGLCharger(MolCharger): +class NAGLCharger(MolCharger, CHARGING_METHOD='NAGL'): '''Charger class for NAGL charging''' - CHARGING_METHOD : ClassVar[str] = 'NAGL' - @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: nagl_charges = NAGL_MODEL.compute_property(uncharged_mol, check_domains=True, error_if_unsupported=True) diff --git a/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py b/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py index dd168e7..2f2810a 100644 --- a/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py +++ b/polymerist/mdtools/openfftools/partialcharge/rescharge/interface.py @@ -3,7 +3,6 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from typing import ClassVar from dataclasses import dataclass from openff.toolkit import Molecule @@ -15,10 +14,9 @@ @dataclass -class LibraryCharger(MolCharger): +class LibraryCharger(MolCharger, CHARGING_METHOD='RCT'): '''Charger class for applying library charges onto residue-mapped Molecules''' charges_by_residue : ChargesByResidue - CHARGING_METHOD : ClassVar[str] = 'RCT' @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: From eba751b8b064513ce81a2bdc314bc6c8170781e9 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 20:05:09 -0700 Subject: [PATCH 085/191] Made MolCharger subclasses aware of package dependences, directly make use of respective ToolkitWrappers --- .../openfftools/partialcharge/molchargers.py | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/polymerist/mdtools/openfftools/partialcharge/molchargers.py b/polymerist/mdtools/openfftools/partialcharge/molchargers.py index 72082f3..73f1d98 100644 --- a/polymerist/mdtools/openfftools/partialcharge/molchargers.py +++ b/polymerist/mdtools/openfftools/partialcharge/molchargers.py @@ -6,18 +6,15 @@ import logging LOGGER = logging.getLogger(__name__) -from typing import Any, ClassVar, Union -from abc import ABC, abstractmethod, abstractproperty +from typing import Union +from abc import ABC, abstractmethod from rdkit import Chem -from openff.units import unit as offunit from openff.toolkit.topology.molecule import Molecule -from openff.toolkit.utils.exceptions import ToolkitUnavailableException # TODO : use chargemethods.TOOLKITS_BY_CHARGE_METHOD to automatically determine whether/which toolkits are available for each method -from .. import TKREGS, _OE_TKWRAPPER_IS_AVAILABLE, OEUnavailableException -from .chargemethods import NAGL_MODEL -from ....genutils.decorators.classmod import register_subclasses, register_abstract_class_attrs +from ....genutils.importutils.dependencies import requires_modules from ....genutils.decorators.functional import optional_in_place +from ....genutils.decorators.classmod import register_subclasses, register_abstract_class_attrs def has_partial_charges(mol : Union[Molecule, Chem.Mol]) -> bool: @@ -52,21 +49,36 @@ def charge_molecule(self, uncharged_mol : Molecule) -> None: # CONCRETE IMPLEMENTATIONS OF DIFFERENT CHARGING METHODS class ABE10Charger(MolCharger, CHARGING_METHOD= 'AM1-BCC-ELF10'): '''Charger class for AM1-BCC-ELF10 exact charging''' + @requires_modules('openeye.oechem', 'openeye.oeomega') # NOTE: just checking "openeye" doesn't work, as for whatever weird reason the toplevel openeye package has no module spec @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: - if not _OE_TKWRAPPER_IS_AVAILABLE: - raise OEUnavailableException # AM1-BCC-ELF10 is exclusively available thru OpenEye; if it is not present, then, must err - uncharged_mol.assign_partial_charges(partial_charge_method='am1bccelf10', toolkit_registry=TKREGS['OpenEye Toolkit']) # TODO : provide support for AMBER / RDKit if OE license is unavailable + from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper + + uncharged_mol.assign_partial_charges( + partial_charge_method='am1bccelf10', + toolkit_registry=OpenEyeToolkitWrapper(), # instance init will raise exception if license or OpenEye packages are missing + ) # TODO : find decent alternative if OpenEye license is missing (AmberTools doesn't do ELF10 and doesn't work on Windows) class EspalomaCharger(MolCharger, CHARGING_METHOD='Espaloma-AM1-BCC'): '''Charger class for EspalomaCharge charging''' + @requires_modules('espaloma_charge') @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: - uncharged_mol.assign_partial_charges(partial_charge_method='espaloma-am1bcc', toolkit_registry=TKREGS['Espaloma Charge Toolkit']) + from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper + + uncharged_mol.assign_partial_charges( + partial_charge_method='espaloma-am1bcc', # NOTE: this is actually the ONLY charge method the EspalomaChargeToolkitWrapper supports + toolkit_registry=EspalomaChargeToolkitWrapper(), + ) class NAGLCharger(MolCharger, CHARGING_METHOD='NAGL'): '''Charger class for NAGL charging''' + @requires_modules('openff.nagl') @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: - nagl_charges = NAGL_MODEL.compute_property(uncharged_mol, check_domains=True, error_if_unsupported=True) - uncharged_mol.partial_charges = nagl_charges * offunit.elementary_charge # need to have OpenFF-style units attached to set "partial_charges" property + from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper + + uncharged_mol.assign_partial_charges( + partial_charge_method='openff-gnn-am1bcc-0.1.0-rc.3.pt', # 'openff-gnn-am1bcc-0.1.0-rc.2.pt', + toolkit_registry=NAGLToolkitWrapper(), + ) From 1cc27430c547cbf44f96104766022ce80d04a6e8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 20:05:41 -0700 Subject: [PATCH 086/191] Updated and pinned NAGL tooling and model versions in env --- devtools/conda-envs/release-build.yml | 3 ++- devtools/conda-envs/test-env.yml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/devtools/conda-envs/release-build.yml b/devtools/conda-envs/release-build.yml index 00e4b7a..2038fa9 100644 --- a/devtools/conda-envs/release-build.yml +++ b/devtools/conda-envs/release-build.yml @@ -42,7 +42,8 @@ dependencies: # OpenFF stack - openff-toolkit ~=0.16 - openff-interchange >=0.3.28 - - openff-nagl + - openff-nagl >= 0.4 + - openff-nagl-models >= 0.3 # Chemical database queries - cirpy diff --git a/devtools/conda-envs/test-env.yml b/devtools/conda-envs/test-env.yml index 447671e..b5f48f2 100644 --- a/devtools/conda-envs/test-env.yml +++ b/devtools/conda-envs/test-env.yml @@ -42,7 +42,8 @@ dependencies: # OpenFF stack - openff-toolkit ~=0.16 - openff-interchange >=0.3.28 - - openff-nagl + - openff-nagl >= 0.4 + - openff-nagl-models >= 0.3 # Chemical database queries - cirpy From b27c4abc069ba3e67e0ab2a6a76e1580b7aa38dc Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 20:37:46 -0700 Subject: [PATCH 087/191] Strengthened openff.nagl package dependencies --- polymerist/mdtools/openfftools/partialcharge/molchargers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/mdtools/openfftools/partialcharge/molchargers.py b/polymerist/mdtools/openfftools/partialcharge/molchargers.py index 73f1d98..29bca2f 100644 --- a/polymerist/mdtools/openfftools/partialcharge/molchargers.py +++ b/polymerist/mdtools/openfftools/partialcharge/molchargers.py @@ -73,7 +73,7 @@ def _charge_molecule(self, uncharged_mol : Molecule) -> None: class NAGLCharger(MolCharger, CHARGING_METHOD='NAGL'): '''Charger class for NAGL charging''' - @requires_modules('openff.nagl') + @requires_modules('openff.nagl', 'openff.nagl_models') @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper From 458a841a96f0e34bb06655139c8df8e26e702759 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 20:38:18 -0700 Subject: [PATCH 088/191] Mae unit tests aware of installed packages (avoid erroneous errors caused by dependencies, rather than code) --- .../partialcharge/test_molchargers.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py index 60924be..3b43d78 100644 --- a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py +++ b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py @@ -6,9 +6,10 @@ import pytest from openff.toolkit import Molecule +from openff.toolkit.utils.toolkits import OPENEYE_AVAILABLE -from polymerist.polymers.monomers.specification import expanded_SMILES -from polymerist.mdtools.openfftools.partialcharge.molchargers import MolCharger +from polymerist.genutils.importutils.dependencies import modules_installed +from polymerist.mdtools.openfftools.partialcharge.molchargers import MolCharger, ABE10Charger, EspalomaCharger, NAGLCharger from polymerist.mdtools.openfftools.partialcharge.rescharge.interface import LibraryCharger @@ -30,11 +31,17 @@ def offmol() -> Molecule: # DEV: worthing double-checking that partial charges are initially empty?? return Molecule.from_smiles('c1ccccc1C(=O)O') # benzoic acid - nice and small, but with some non-trivial structure -MOLCHARGER_TYPES_TO_TEST = [ - molcharger_subclass - for molcharger_subclass in MolCharger.subclass_registry.values() # LibraryCharger behave differently to other MolCharger and are kind of a pain in the ass generally... - if molcharger_subclass != LibraryCharger # ...intend to deprecate and revamp them eventually, so will just exclude them from testing for now -] +## selectively register test to avoid failures due to missing optional dependencies +MOLCHARGER_TYPES_TO_TEST : list[type[MolCharger]] = [] +if modules_installed('openeye.oechem', 'openeye.oeomega') and OPENEYE_AVAILABLE: # extra check needed block check when missing license (as is the case for the open-source polymerist repo) + MOLCHARGER_TYPES_TO_TEST.append(ABE10Charger) +if modules_installed('espaloma_charge'): + MOLCHARGER_TYPES_TO_TEST.append(EspalomaCharger) +if modules_installed('openff.nagl', 'openff.nagl_models'): + MOLCHARGER_TYPES_TO_TEST.append(NAGLCharger) +# MOLCHARGER_TYPES_TO_TEST.append( + # LibraryCharger # LibraryCharger behave differently to other MolCharger and are kind of a pain in the ass generally... +# ) # ...intend to deprecate and revamp them eventually, so will just exclude them from testing for now @pytest.mark.parametrize('molcharger_subclass', MOLCHARGER_TYPES_TO_TEST) def test_molchargers_assign_charges(offmol : Molecule, molcharger_subclass : type[MolCharger]) -> None: From e102159fa5fbe28c88b6f8aa4deb94ff8717e26b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 21:01:43 -0700 Subject: [PATCH 089/191] Added OpenFF Toolkit-specific exceptions to package requirement decorations --- .../openfftools/partialcharge/chargemethods.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/polymerist/mdtools/openfftools/partialcharge/chargemethods.py b/polymerist/mdtools/openfftools/partialcharge/chargemethods.py index 81b3cae..16494a1 100644 --- a/polymerist/mdtools/openfftools/partialcharge/chargemethods.py +++ b/polymerist/mdtools/openfftools/partialcharge/chargemethods.py @@ -7,13 +7,12 @@ from collections import defaultdict from openff.toolkit.utils.base_wrapper import ToolkitWrapper -from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper from openff.toolkit.utils.builtin_wrapper import BuiltInToolkitWrapper +from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper from openff.toolkit.utils.ambertools_wrapper import AmberToolsToolkitWrapper -from openff import nagl_models -from openff.nagl import GNNModel +from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper from .. import REGISTERED_TKWRAPPER_TYPES @@ -53,9 +52,3 @@ if (tkwrapper_type in REGISTERED_TKWRAPPER_TYPES): # exclude non-registered toolkits to avoid confusion for method in supported_methods: TOOLKITS_BY_CHARGE_METHOD[method].append(tkwrapper_type) - - -## NAGL GNN Model -NAGL_MODEL_PATH = nagl_models.list_available_nagl_models()[1] # Path(/home/timber/miniconda3/envs/polymerist-env/lib/python3.11/site-packages/openff/nagl_models/models/openff-gnn-am1bcc-0.1.0-rc.1.pt) -NAGL_MODEL_PATH = nagl_models.validate_nagl_model_path(NAGL_MODEL_PATH) # double check that this model path is still one of the valid entry point -NAGL_MODEL = GNNModel.load(NAGL_MODEL_PATH) \ No newline at end of file From e51f1d330ae39cd0dee4ac7abb824a601a746393 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 21:40:29 -0700 Subject: [PATCH 090/191] Re-added OpenFF Toolkit-specific exceptions to package requirement decorations --- .../mdtools/openfftools/partialcharge/molchargers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/polymerist/mdtools/openfftools/partialcharge/molchargers.py b/polymerist/mdtools/openfftools/partialcharge/molchargers.py index 29bca2f..71a8192 100644 --- a/polymerist/mdtools/openfftools/partialcharge/molchargers.py +++ b/polymerist/mdtools/openfftools/partialcharge/molchargers.py @@ -11,6 +11,7 @@ from rdkit import Chem from openff.toolkit.topology.molecule import Molecule +from openff.toolkit.utils.exceptions import ToolkitUnavailableException from ....genutils.importutils.dependencies import requires_modules from ....genutils.decorators.functional import optional_in_place @@ -49,7 +50,7 @@ def charge_molecule(self, uncharged_mol : Molecule) -> None: # CONCRETE IMPLEMENTATIONS OF DIFFERENT CHARGING METHODS class ABE10Charger(MolCharger, CHARGING_METHOD= 'AM1-BCC-ELF10'): '''Charger class for AM1-BCC-ELF10 exact charging''' - @requires_modules('openeye.oechem', 'openeye.oeomega') # NOTE: just checking "openeye" doesn't work, as for whatever weird reason the toplevel openeye package has no module spec + @requires_modules('openeye.oechem', 'openeye.oeomega', missing_module_error=ToolkitUnavailableException) # for whatever weird reason the toplevel openeye package has no module spec, so just checking "openeye" isn't enough @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper @@ -61,19 +62,19 @@ def _charge_molecule(self, uncharged_mol : Molecule) -> None: class EspalomaCharger(MolCharger, CHARGING_METHOD='Espaloma-AM1-BCC'): '''Charger class for EspalomaCharge charging''' - @requires_modules('espaloma_charge') + @requires_modules('espaloma_charge', missing_module_error=ToolkitUnavailableException) @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper uncharged_mol.assign_partial_charges( - partial_charge_method='espaloma-am1bcc', # NOTE: this is actually the ONLY charge method the EspalomaChargeToolkitWrapper supports + partial_charge_method='espaloma-am1bcc', # this is actually the ONLY charge method the EspalomaChargeToolkitWrapper supports toolkit_registry=EspalomaChargeToolkitWrapper(), ) class NAGLCharger(MolCharger, CHARGING_METHOD='NAGL'): '''Charger class for NAGL charging''' - @requires_modules('openff.nagl', 'openff.nagl_models') + @requires_modules('openff.nagl', 'openff.nagl_models', missing_module_error=ToolkitUnavailableException) @optional_in_place def _charge_molecule(self, uncharged_mol : Molecule) -> None: from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper From 0ec3e001deb3b772be53b5e69f849ef5ace4ca53 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 20 Nov 2024 21:40:49 -0700 Subject: [PATCH 091/191] Added provisional module for smart OpenFF ToolkitWrapper registration --- polymerist/mdtools/openfftools/_toolkits.py | 113 ++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 polymerist/mdtools/openfftools/_toolkits.py diff --git a/polymerist/mdtools/openfftools/_toolkits.py b/polymerist/mdtools/openfftools/_toolkits.py new file mode 100644 index 0000000..882a3a7 --- /dev/null +++ b/polymerist/mdtools/openfftools/_toolkits.py @@ -0,0 +1,113 @@ +'''For dynamically determining and cataloging which ToolkitWrappers (and accompanying functionality) are available''' + +# Subpackage-wide precheck to see if OpenFF is even usable in the first place +from ...genutils.importutils.dependencies import modules_installed +if not modules_installed('openff', 'openff.toolkit'): + raise ModuleNotFoundError( + f''' + OpenFF packages which are required to utilitize {__name__} not found in current environment + Please follow installation instructions at https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html, then retry import + ''' + ) + +# Core OpenFF toolkit component registration +_REGISTER_TOOLKITS_TO_GLOBAL : bool = True # TODO: find way to avoid setting this config parameter directly in code + +from typing import Union +from collections import defaultdict + +from openff.toolkit.utils.utils import all_subclasses +from openff.toolkit.utils.exceptions import LicenseError, ToolkitUnavailableException +from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths + +from openff.toolkit.utils.base_wrapper import ToolkitWrapper +from openff.toolkit.utils.toolkit_registry import ToolkitRegistry +from openff.toolkit.utils.toolkits import ( + OPENEYE_AVAILABLE, + RDKIT_AVAILABLE, + AMBERTOOLS_AVAILABLE, + GLOBAL_TOOLKIT_REGISTRY as GTR, +) + +def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[ToolkitWrapper]], toolkit_registry : ToolkitRegistry=GTR) -> bool: + '''Check whether a ToolkitRegistry instance has already registered a given ToolkitWrapper subclass''' + if not isinstance(toolkit_wrapper, type): # ToolkitWrapper TYPES are needed for this check; any instances therefore... + toolkit_wrapper = type(toolkit_wrapper) # ...will have their respective types extracted + if not issubclass(toolkit_wrapper, ToolkitWrapper): + raise TypeError(f'Expected a ToolkitWrapper instance or subclass, instead received object of type {toolkit_wrapper.__name__}') + + return any(isinstance(tkwrapper, toolkit_wrapper) for tkwrapper in toolkit_registry.registered_toolkits) + + +# Setup of containers for OpenFF module info +## ToolkitWrapper reference +ALL_IMPORTABLE_TKWRAPPERS : list[type[ToolkitWrapper]] = all_subclasses(ToolkitWrapper) # NOTE: just because you can import these, doesn't necessarily mean they can be instantiated +ALL_AVAILABLE_TKWRAPPERS : list[type[ToolkitWrapper]] = [] + +TKWRAPPERS : dict[str, ToolkitWrapper ] = {} # TODO: populate these! +TKWRAPPER_TYPES : dict[str, type[ToolkitWrapper]] = {} # TODO: populate these! +POLYMERIST_TOOLKIT_REGISTRY = ToolkitRegistry() # retain a local registry separate from GLOBAL_TOOLKIT_REGISTRY + +## Partial charge method reference +CHARGE_METHODS_BY_TOOLKIT : dict[type[ToolkitWrapper], list[str]] = defaultdict(list) +TOOLKITS_BY_CHARGE_METHOD : dict[str, list[type[ToolkitWrapper]]] = defaultdict(list) # also compile inverse mapping (compiled once available toolkits are known) + + +# Toolkit-specific registrations which depend on available packages +## BuiltIn (not particularly useful in and of itself, but nice to know it's accessible) +if modules_installed('openff.toolkit'): # this check is idempotent to initial OpenFF check, but is nice to have for consistency between all ToolkitWrappers below + from openff.toolkit.utils.builtin_wrapper import BuiltInToolkitWrapper + + ALL_AVAILABLE_TKWRAPPERS.append(BuiltInToolkitWrapper) + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(BuiltInToolkitWrapper()) + CHARGE_METHODS_BY_TOOLKIT[BuiltInToolkitWrapper] = [charge_method for charge_method in BuiltInToolkitWrapper._supported_charge_methods] + +## RDKit +if modules_installed('rdkit') and RDKIT_AVAILABLE: + from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper + + ALL_AVAILABLE_TKWRAPPERS.append(RDKitToolkitWrapper) + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(RDKitToolkitWrapper()) + CHARGE_METHODS_BY_TOOLKIT[RDKitToolkitWrapper] = [charge_method for charge_method in RDKitToolkitWrapper._supported_charge_methods] + +## Ambertools +if modules_installed('ambertools') and AMBERTOOLS_AVAILABLE: + from openff.toolkit.utils.ambertools_wrapper import AmberToolsToolkitWrapper + + ALL_AVAILABLE_TKWRAPPERS.append(AmberToolsToolkitWrapper) + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(AmberToolsToolkitWrapper()) + CHARGE_METHODS_BY_TOOLKIT[AmberToolsToolkitWrapper] = [charge_method for charge_method in AmberToolsToolkitWrapper._supported_charge_methods] + +## OpenEye +if modules_installed('openeye.oechem', 'openeye.oeomega') and OPENEYE_AVAILABLE: + from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper + + ALL_AVAILABLE_TKWRAPPERS.append(OpenEyeToolkitWrapper) + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(OpenEyeToolkitWrapper()) + CHARGE_METHODS_BY_TOOLKIT[OpenEyeToolkitWrapper] = [charge_method for charge_method in OpenEyeToolkitWrapper._supported_charge_methods] + +## NAGL - extracting available charge methods is a little different for GNN toolkits +if modules_installed('openff.nagl', 'openff.nagl_models'): + from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper + + ALL_AVAILABLE_TKWRAPPERS.append(NAGLToolkitWrapper) + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(NAGLToolkitWrapper()) + CHARGE_METHODS_BY_TOOLKIT[NAGLToolkitWrapper] = [model_path.name for model_path in NAGLToolkitWrapper.list_available_nagl_models()] # need to extract dynamically from Paths + +## Espaloma - extracting available charge methods is a little different for GNN toolkits +if modules_installed('espaloma_charge'): + from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper + + ALL_AVAILABLE_TKWRAPPERS.append(EspalomaChargeToolkitWrapper) + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(EspalomaChargeToolkitWrapper()) + CHARGE_METHODS_BY_TOOLKIT[EspalomaChargeToolkitWrapper] = ['espaloma-am1bcc'] # this is, at this of writing, the only available method for EspalomaCharge and unfortunately not accessible dynamically + +# Post-registration info compilation +## Compiling registry of which partial charge methods are supported by which toolkits +for tkwrapper_type, supported_methods in TOOLKITS_BY_CHARGE_METHOD.items(): + if (tkwrapper_type in ALL_AVAILABLE_TKWRAPPERS): # exclude non-registered toolkits to avoid confusion + for method in supported_methods: + TOOLKITS_BY_CHARGE_METHOD[method].append(tkwrapper_type) +TOOLKITS_BY_CHARGE_METHOD = dict(TOOLKITS_BY_CHARGE_METHOD) # convert to pure dict for typing purposes + +# TODO: add optional mirror into GTR if specified \ No newline at end of file From 844fa03e2e46f2211a54946b843358e773a1e656 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 21 Nov 2024 17:11:59 -0700 Subject: [PATCH 092/191] Reworked type hints to deprecate external dependency on typetools --- polymerist/genutils/decorators/meta.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/polymerist/genutils/decorators/meta.py b/polymerist/genutils/decorators/meta.py index 082dfd6..bedfc0e 100644 --- a/polymerist/genutils/decorators/meta.py +++ b/polymerist/genutils/decorators/meta.py @@ -6,8 +6,9 @@ from typing import Concatenate, Callable, ParamSpec, TypeAlias, TypeVar from functools import update_wrapper, wraps -from ..typetools.parametric import C, O, P, R, Args, KWArgs -Decorator : TypeAlias = Callable[[Callable[P, R]], Callable[P, R]] +Params = ParamSpec('Params') # can also use to typehint *args and **kwargs +ReturnType = TypeVar('ReturnType') +Decorator : TypeAlias = Callable[[Callable[Params, ReturnType]], Callable[Params, ReturnType]] # META DECORATORS @@ -18,16 +19,16 @@ def extend_to_methods(dec : Decorator) -> Decorator: @wraps(dec, updated=()) # transfer decorator signature to decorator adapter class, without updating the __dict__ field class AdaptedDecorator: - def __init__(self, funct : Callable[P, R]) -> None: + def __init__(self, funct : Callable[Params, ReturnType]) -> None: '''Record function''' self.funct = funct update_wrapper(self, funct) # equivalent to functools.wraps, transfers docstring, module, etc. for documentation - def __call__(self, *args : Args, **kwargs : KWArgs) -> ReturnSignature: # TODO : fix this to reflect the decorator's return signature + def __call__(self, *args : Params.args, **kwargs : Params.kwargs) -> ReturnSignature: # TODO : fix this to reflect the decorator's return signature '''Apply decorator to function, then call decorated function''' return dec(self.funct)(*args, **kwargs) - def __get__(self, instance : O, owner : C) -> Callable[[Concatenate[O, P]], R]: + def __get__(self, instance : object, owner : type) -> Callable[[Concatenate[object, Params]], ReturnType]: '''Generate partial application with calling instance as first argument (fills in for "self")''' method = self.funct.__get__(instance, owner) # look up method belonging to owner class return dec(method) # return the decorated method From 9dda6bfecc6281a3b717d061a5dcb5412e7af19a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 21 Nov 2024 17:25:10 -0700 Subject: [PATCH 093/191] Removed dependency on typetools --- polymerist/genutils/decorators/functional.py | 23 ++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/polymerist/genutils/decorators/functional.py b/polymerist/genutils/decorators/functional.py index 5504c93..a5999f7 100644 --- a/polymerist/genutils/decorators/functional.py +++ b/polymerist/genutils/decorators/functional.py @@ -3,7 +3,10 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from typing import Callable, Iterable, Optional, Type, Union +from typing import Callable, Concatenate, Iterable, Iterator, Optional, ParamSpec, TypeVar, Union + +T = TypeVar('T') +Params = ParamSpec('Params') from inspect import signature, Parameter from functools import wraps, partial @@ -13,20 +16,18 @@ from .meta import extend_to_methods from . import signatures -from ..typetools.parametric import T, Args, KWArgs -from ..typetools.categorical import ListLike from ..fileutils.pathutils import aspath, asstrpath @extend_to_methods -def optional_in_place(funct : Callable[[object, Args, KWArgs], None]) -> Callable[[object, Args, bool, KWArgs], Optional[object]]: +def optional_in_place(funct : Callable[[Concatenate[object, Params]], None]) -> Callable[[Concatenate[object, Params]], Optional[object]]: '''Decorator function for allowing in-place (writeable) functions which modify object attributes to be not performed in-place (i.e. read-only), specified by a boolean flag''' # TODO : add assertion that the wrapped function has at least one arg AND that the first arg is of the desired (limited) type old_sig = signature(funct) @wraps(funct) # for preserving docstring and type annotations / signatures - def in_place_wrapper(obj : object, *args : Args, in_place : bool=False, **kwargs : KWArgs) -> Optional[object]: # read-only by default + def in_place_wrapper(obj : object, *args : Params.args, in_place : bool=False, **kwargs : Params.kwargs) -> Optional[object]: # read-only by default '''If not in-place, create a clone on which the method is executed''' # NOTE : old_sig.bind screws up arg passing if in_place: funct(obj, *args, **kwargs) # default call to writeable method - implicitly returns None @@ -54,9 +55,9 @@ def in_place_wrapper(obj : object, *args : Args, in_place : bool=False, **kwargs return in_place_wrapper # TODO : implement support for extend_to_methods (current mechanism is broken by additional deocrator parameters) -def flexible_listlike_input(funct : Callable[[ListLike], T]=None, CastType : Type[ListLike]=list, valid_member_types : Union[Type, tuple[Type]]=object) -> Callable[[Iterable], T]: +def flexible_listlike_input(funct : Callable[[Iterator], T]=None, CastType : type[Iterator]=list, valid_member_types : Union[type, tuple[type]]=object) -> Callable[[Iterable], T]: '''Wrapper which allows a function which expects a single list-initializable, Container-like object to accept any Iterable (or even star-unpacked arguments)''' - if not issubclass(CastType, ListLike): + if not issubclass(CastType, Iterator): raise TypeError(f'Cannot wrap listlike input with non-listlike type "{CastType.__name__}"') @wraps(funct) @@ -79,13 +80,13 @@ def wrapper(*args) -> T: # wrapper which accepts an arbitrary number of non-keyw return wrapper @extend_to_methods -def allow_string_paths(funct : Callable[[Path, Args, KWArgs], T]) -> Callable[[Union[Path, str], Args, KWArgs], T]: +def allow_string_paths(funct : Callable[[Concatenate[Path, Params]], T]) -> Callable[[Concatenate[Union[Path, str], Params]], T]: '''Modifies a function which expects a Path as its first argument to also accept string-paths''' # TODO : add assertion that the wrapped function has at least one arg AND that the first arg is of the desired (limited) type old_sig = signature(funct) # lookup old type signature @wraps(funct) # for preserving docstring and type annotations / signatures - def str_path_wrapper(flex_path : Union[str, Path], *args : Args, **kwargs : KWArgs) -> T: + def str_path_wrapper(flex_path : Union[str, Path], *args : Params.args, **kwargs : Params.kwargs) -> T: '''First converts stringy paths into normal Paths, then executes the original function''' return funct(aspath(flex_path), *args, **kwargs) @@ -99,13 +100,13 @@ def str_path_wrapper(flex_path : Union[str, Path], *args : Args, **kwargs : KWAr return str_path_wrapper @extend_to_methods -def allow_pathlib_paths(funct : Callable[[str, Args, KWArgs], T]) -> Callable[[Union[Path, str], Args, KWArgs], T]: +def allow_pathlib_paths(funct : Callable[[Concatenate[str, Params]], T]) -> Callable[[Concatenate[Union[Path, str], Params]], T]: '''Modifies a function which expects a string path as its first argument to also accept canonical pathlib Paths''' # TODO : add assertion that the wrapped function has at least one arg AND that the first arg is of the desired (limited) type old_sig = signature(funct) # lookup old type signature @wraps(funct) # for preserving docstring and type annotations / signatures - def str_path_wrapper(flex_path : Union[str, Path], *args : Args, **kwargs : KWArgs) -> T: + def str_path_wrapper(flex_path : Union[str, Path], *args : Params.args, **kwargs : Params.kwargs) -> T: '''First converts normal Paths into stringy paths, then executes the original function''' return funct(asstrpath(flex_path), *args, **kwargs) From 4943e10fb536d7f8329b0a4066cb7e8fffef28ea Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 14:18:53 -0700 Subject: [PATCH 094/191] Fixed ambertools package check, TOOLKITS_BY_CHARGE_METHOD population loop --- polymerist/mdtools/openfftools/_toolkits.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/polymerist/mdtools/openfftools/_toolkits.py b/polymerist/mdtools/openfftools/_toolkits.py index 882a3a7..6d71a60 100644 --- a/polymerist/mdtools/openfftools/_toolkits.py +++ b/polymerist/mdtools/openfftools/_toolkits.py @@ -11,8 +11,6 @@ ) # Core OpenFF toolkit component registration -_REGISTER_TOOLKITS_TO_GLOBAL : bool = True # TODO: find way to avoid setting this config parameter directly in code - from typing import Union from collections import defaultdict @@ -29,6 +27,7 @@ GLOBAL_TOOLKIT_REGISTRY as GTR, ) +_REGISTER_TOOLKITS_TO_GLOBAL : bool = True # TODO: find way to avoid setting this config parameter directly in code def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[ToolkitWrapper]], toolkit_registry : ToolkitRegistry=GTR) -> bool: '''Check whether a ToolkitRegistry instance has already registered a given ToolkitWrapper subclass''' if not isinstance(toolkit_wrapper, type): # ToolkitWrapper TYPES are needed for this check; any instances therefore... @@ -71,7 +70,7 @@ def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[T CHARGE_METHODS_BY_TOOLKIT[RDKitToolkitWrapper] = [charge_method for charge_method in RDKitToolkitWrapper._supported_charge_methods] ## Ambertools -if modules_installed('ambertools') and AMBERTOOLS_AVAILABLE: +if modules_installed('pdb4amber') and AMBERTOOLS_AVAILABLE: # turns out "ambertools" can't actually be imported as a module, need to check for peripheral modules which are better behaved instead from openff.toolkit.utils.ambertools_wrapper import AmberToolsToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(AmberToolsToolkitWrapper) @@ -104,7 +103,7 @@ def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[T # Post-registration info compilation ## Compiling registry of which partial charge methods are supported by which toolkits -for tkwrapper_type, supported_methods in TOOLKITS_BY_CHARGE_METHOD.items(): +for tkwrapper_type, supported_methods in CHARGE_METHODS_BY_TOOLKIT.items(): if (tkwrapper_type in ALL_AVAILABLE_TKWRAPPERS): # exclude non-registered toolkits to avoid confusion for method in supported_methods: TOOLKITS_BY_CHARGE_METHOD[method].append(tkwrapper_type) From 4062ab4e0c74e601de4ac3011df2b951751b1116 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 15:21:58 -0700 Subject: [PATCH 095/191] Added dynamic ToolkitRegistry and name-based toolkit wrapper registration --- polymerist/mdtools/openfftools/_toolkits.py | 76 ++++++++++++--------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/polymerist/mdtools/openfftools/_toolkits.py b/polymerist/mdtools/openfftools/_toolkits.py index 6d71a60..38ebd2e 100644 --- a/polymerist/mdtools/openfftools/_toolkits.py +++ b/polymerist/mdtools/openfftools/_toolkits.py @@ -24,9 +24,12 @@ OPENEYE_AVAILABLE, RDKIT_AVAILABLE, AMBERTOOLS_AVAILABLE, - GLOBAL_TOOLKIT_REGISTRY as GTR, + GLOBAL_TOOLKIT_REGISTRY, ) +GTR = GLOBAL_TOOLKIT_REGISTRY # alias for brevity + +# Config and utility functions _REGISTER_TOOLKITS_TO_GLOBAL : bool = True # TODO: find way to avoid setting this config parameter directly in code def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[ToolkitWrapper]], toolkit_registry : ToolkitRegistry=GTR) -> bool: '''Check whether a ToolkitRegistry instance has already registered a given ToolkitWrapper subclass''' @@ -37,20 +40,10 @@ def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[T return any(isinstance(tkwrapper, toolkit_wrapper) for tkwrapper in toolkit_registry.registered_toolkits) - -# Setup of containers for OpenFF module info -## ToolkitWrapper reference +# Setup of initial containers for OpenFF module info ALL_IMPORTABLE_TKWRAPPERS : list[type[ToolkitWrapper]] = all_subclasses(ToolkitWrapper) # NOTE: just because you can import these, doesn't necessarily mean they can be instantiated ALL_AVAILABLE_TKWRAPPERS : list[type[ToolkitWrapper]] = [] - -TKWRAPPERS : dict[str, ToolkitWrapper ] = {} # TODO: populate these! -TKWRAPPER_TYPES : dict[str, type[ToolkitWrapper]] = {} # TODO: populate these! -POLYMERIST_TOOLKIT_REGISTRY = ToolkitRegistry() # retain a local registry separate from GLOBAL_TOOLKIT_REGISTRY - -## Partial charge method reference CHARGE_METHODS_BY_TOOLKIT : dict[type[ToolkitWrapper], list[str]] = defaultdict(list) -TOOLKITS_BY_CHARGE_METHOD : dict[str, list[type[ToolkitWrapper]]] = defaultdict(list) # also compile inverse mapping (compiled once available toolkits are known) - # Toolkit-specific registrations which depend on available packages ## BuiltIn (not particularly useful in and of itself, but nice to know it's accessible) @@ -58,55 +51,76 @@ def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[T from openff.toolkit.utils.builtin_wrapper import BuiltInToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(BuiltInToolkitWrapper) - POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(BuiltInToolkitWrapper()) - CHARGE_METHODS_BY_TOOLKIT[BuiltInToolkitWrapper] = [charge_method for charge_method in BuiltInToolkitWrapper._supported_charge_methods] - + CHARGE_METHODS_BY_TOOLKIT[BuiltInToolkitWrapper] = [ + charge_method + for charge_method in BuiltInToolkitWrapper._supported_charge_methods + ] ## RDKit if modules_installed('rdkit') and RDKIT_AVAILABLE: from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(RDKitToolkitWrapper) - POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(RDKitToolkitWrapper()) - CHARGE_METHODS_BY_TOOLKIT[RDKitToolkitWrapper] = [charge_method for charge_method in RDKitToolkitWrapper._supported_charge_methods] - + CHARGE_METHODS_BY_TOOLKIT[RDKitToolkitWrapper] = [ + charge_method + for charge_method in RDKitToolkitWrapper._supported_charge_methods + ] ## Ambertools if modules_installed('pdb4amber') and AMBERTOOLS_AVAILABLE: # turns out "ambertools" can't actually be imported as a module, need to check for peripheral modules which are better behaved instead from openff.toolkit.utils.ambertools_wrapper import AmberToolsToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(AmberToolsToolkitWrapper) - POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(AmberToolsToolkitWrapper()) - CHARGE_METHODS_BY_TOOLKIT[AmberToolsToolkitWrapper] = [charge_method for charge_method in AmberToolsToolkitWrapper._supported_charge_methods] - + CHARGE_METHODS_BY_TOOLKIT[AmberToolsToolkitWrapper] = [ + charge_method + for charge_method in AmberToolsToolkitWrapper._supported_charge_methods + ] ## OpenEye if modules_installed('openeye.oechem', 'openeye.oeomega') and OPENEYE_AVAILABLE: from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(OpenEyeToolkitWrapper) - POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(OpenEyeToolkitWrapper()) - CHARGE_METHODS_BY_TOOLKIT[OpenEyeToolkitWrapper] = [charge_method for charge_method in OpenEyeToolkitWrapper._supported_charge_methods] - + CHARGE_METHODS_BY_TOOLKIT[OpenEyeToolkitWrapper] = [ + charge_method + for charge_method in OpenEyeToolkitWrapper._supported_charge_methods + ] ## NAGL - extracting available charge methods is a little different for GNN toolkits if modules_installed('openff.nagl', 'openff.nagl_models'): from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(NAGLToolkitWrapper) - POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(NAGLToolkitWrapper()) - CHARGE_METHODS_BY_TOOLKIT[NAGLToolkitWrapper] = [model_path.name for model_path in NAGLToolkitWrapper.list_available_nagl_models()] # need to extract dynamically from Paths - + CHARGE_METHODS_BY_TOOLKIT[NAGLToolkitWrapper] = [ + model_path.name # need to extract dynamically from Paths + for model_path in NAGLToolkitWrapper.list_available_nagl_models() + ] ## Espaloma - extracting available charge methods is a little different for GNN toolkits if modules_installed('espaloma_charge'): from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper ALL_AVAILABLE_TKWRAPPERS.append(EspalomaChargeToolkitWrapper) - POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(EspalomaChargeToolkitWrapper()) - CHARGE_METHODS_BY_TOOLKIT[EspalomaChargeToolkitWrapper] = ['espaloma-am1bcc'] # this is, at this of writing, the only available method for EspalomaCharge and unfortunately not accessible dynamically + CHARGE_METHODS_BY_TOOLKIT[EspalomaChargeToolkitWrapper] = [ + 'espaloma-am1bcc' + ] # this is, at this of writing, the only available method for EspalomaCharge and unfortunately not accessible dynamically # Post-registration info compilation +## Compiling name-based lookups for available ToolkitWrappers and registering all to a local +POLYMERIST_TOOLKIT_REGISTRY = ToolkitRegistry() # retain a local registry separate from GLOBAL_TOOLKIT_REGISTRY +TKWRAPPERS : dict[str, ToolkitWrapper ] = {} +TKWRAPPER_TYPES : dict[str, type[ToolkitWrapper]] = {} + +for tkwrapper_type in ALL_AVAILABLE_TKWRAPPERS: + tkwrapper_instance = tkwrapper_type() # instantiate toolkit wrapper class + POLYMERIST_TOOLKIT_REGISTRY.register_toolkit(tkwrapper_instance) + # if requested, also mirror all found toolkits to the Global ToolkitRegistry + if _REGISTER_TOOLKITS_TO_GLOBAL and not toolkit_wrapper_is_registered(tkwrapper_type, GLOBAL_TOOLKIT_REGISTRY): # make registration idempotent + GLOBAL_TOOLKIT_REGISTRY.register_toolkit(tkwrapper_instance) + + # register to name-based lookup dict + TKWRAPPERS[ tkwrapper_type._toolkit_name] = tkwrapper_instance + TKWRAPPER_TYPES[tkwrapper_type._toolkit_name] = tkwrapper_type + ## Compiling registry of which partial charge methods are supported by which toolkits +TOOLKITS_BY_CHARGE_METHOD : dict[str, list[type[ToolkitWrapper]]] = defaultdict(list) # also compile inverse mapping (compiled once available toolkits are known) for tkwrapper_type, supported_methods in CHARGE_METHODS_BY_TOOLKIT.items(): if (tkwrapper_type in ALL_AVAILABLE_TKWRAPPERS): # exclude non-registered toolkits to avoid confusion for method in supported_methods: TOOLKITS_BY_CHARGE_METHOD[method].append(tkwrapper_type) TOOLKITS_BY_CHARGE_METHOD = dict(TOOLKITS_BY_CHARGE_METHOD) # convert to pure dict for typing purposes - -# TODO: add optional mirror into GTR if specified \ No newline at end of file From 589c495d5d4950fc6248c7dbc3f37317ce806440 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 15:26:40 -0700 Subject: [PATCH 096/191] Added submodule for SMIRNOFF force field registrations --- .../mdtools/openfftools/_forcefields.py | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 polymerist/mdtools/openfftools/_forcefields.py diff --git a/polymerist/mdtools/openfftools/_forcefields.py b/polymerist/mdtools/openfftools/_forcefields.py new file mode 100644 index 0000000..2f38556 --- /dev/null +++ b/polymerist/mdtools/openfftools/_forcefields.py @@ -0,0 +1,24 @@ +'''For dynamically determining and cataloging which SMIRNOFF-copatible force fields are installed (and accompanying functionality) are available''' + +from typing import Optional +from pathlib import Path + +from ...genutils.importutils.dependencies import modules_installed + + +# Force field and ToolkitWrapper reference +FFDIR : Optional[Path] = None +FF_DIR_REGISTRY : dict[Path, Path] = {} +FF_PATH_REGISTRY : dict[Path, Path] = {} + +if modules_installed('openff.toolkit', 'openforcefields'): + import openforcefields + from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths + + FFDIR = Path(openforcefields.get_forcefield_dirs_paths()[0]) # Locate path where OpenFF forcefields are installed + for ffdir_str in _get_installed_offxml_dir_paths(): + ffdir = Path(ffdir_str) + ffdir_name = ffdir.parent.stem + + FF_DIR_REGISTRY[ ffdir_name] = ffdir + FF_PATH_REGISTRY[ffdir_name] = [path for path in ffdir.glob('*.offxml')] \ No newline at end of file From fbd98c15a7c46dbb239baf956d611ab45ecb3d12 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 15:32:11 -0700 Subject: [PATCH 097/191] Relativized imports, broke up logic blocks among respective OpenFf dependencies --- polymerist/mdtools/openfftools/_forcefields.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/polymerist/mdtools/openfftools/_forcefields.py b/polymerist/mdtools/openfftools/_forcefields.py index 2f38556..e5fb010 100644 --- a/polymerist/mdtools/openfftools/_forcefields.py +++ b/polymerist/mdtools/openfftools/_forcefields.py @@ -8,14 +8,16 @@ # Force field and ToolkitWrapper reference FFDIR : Optional[Path] = None +if modules_installed('openff.toolkit'): + from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths + + FFDIR = Path(get_forcefield_dirs_paths()[0]) # Locate path where OpenFF forcefields are installed + FF_DIR_REGISTRY : dict[Path, Path] = {} FF_PATH_REGISTRY : dict[Path, Path] = {} - -if modules_installed('openff.toolkit', 'openforcefields'): - import openforcefields - from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths +if modules_installed('openforcefields'): + from openforcefields import get_forcefield_dirs_paths - FFDIR = Path(openforcefields.get_forcefield_dirs_paths()[0]) # Locate path where OpenFF forcefields are installed for ffdir_str in _get_installed_offxml_dir_paths(): ffdir = Path(ffdir_str) ffdir_name = ffdir.parent.stem From aa2cd54f727ae0383f625c690090ffb1ec7b8dad Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 15:33:28 -0700 Subject: [PATCH 098/191] Inserted author and email tags --- polymerist/mdtools/openfftools/_forcefields.py | 3 +++ polymerist/mdtools/openfftools/_toolkits.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/polymerist/mdtools/openfftools/_forcefields.py b/polymerist/mdtools/openfftools/_forcefields.py index e5fb010..a4d64d9 100644 --- a/polymerist/mdtools/openfftools/_forcefields.py +++ b/polymerist/mdtools/openfftools/_forcefields.py @@ -1,5 +1,8 @@ '''For dynamically determining and cataloging which SMIRNOFF-copatible force fields are installed (and accompanying functionality) are available''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + from typing import Optional from pathlib import Path diff --git a/polymerist/mdtools/openfftools/_toolkits.py b/polymerist/mdtools/openfftools/_toolkits.py index 38ebd2e..edae150 100644 --- a/polymerist/mdtools/openfftools/_toolkits.py +++ b/polymerist/mdtools/openfftools/_toolkits.py @@ -1,5 +1,8 @@ '''For dynamically determining and cataloging which ToolkitWrappers (and accompanying functionality) are available''' +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + # Subpackage-wide precheck to see if OpenFF is even usable in the first place from ...genutils.importutils.dependencies import modules_installed if not modules_installed('openff', 'openff.toolkit'): From c1393acaea727a02f5ed2a010bc1124ad4d7d5c7 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 15:53:56 -0700 Subject: [PATCH 099/191] Deprecated chargemethods submodule in favor of direct registry import from _toolkits --- .../openfftools/partialcharge/__init__.py | 5 ++ .../partialcharge/chargemethods.py | 54 ------------------- 2 files changed, 5 insertions(+), 54 deletions(-) delete mode 100644 polymerist/mdtools/openfftools/partialcharge/chargemethods.py diff --git a/polymerist/mdtools/openfftools/partialcharge/__init__.py b/polymerist/mdtools/openfftools/partialcharge/__init__.py index c5612d8..9c9c334 100644 --- a/polymerist/mdtools/openfftools/partialcharge/__init__.py +++ b/polymerist/mdtools/openfftools/partialcharge/__init__.py @@ -2,3 +2,8 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' + +from .._toolkits import ( + CHARGE_METHODS_BY_TOOLKIT, + TOOLKITS_BY_CHARGE_METHOD, +) \ No newline at end of file diff --git a/polymerist/mdtools/openfftools/partialcharge/chargemethods.py b/polymerist/mdtools/openfftools/partialcharge/chargemethods.py deleted file mode 100644 index 16494a1..0000000 --- a/polymerist/mdtools/openfftools/partialcharge/chargemethods.py +++ /dev/null @@ -1,54 +0,0 @@ -'''Registry module for keeping track of which partial charging toolkit registries and related methods are available''' - -__author__ = 'Timotej Bernat' -__email__ = 'timotej.bernat@colorado.edu' - -from typing import Type -from collections import defaultdict - -from openff.toolkit.utils.base_wrapper import ToolkitWrapper -from openff.toolkit.utils.builtin_wrapper import BuiltInToolkitWrapper -from openff.toolkit.utils.rdkit_wrapper import RDKitToolkitWrapper -from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper -from openff.toolkit.utils.ambertools_wrapper import AmberToolsToolkitWrapper - -from openff.toolkit.utils.nagl_wrapper import NAGLToolkitWrapper -from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper - -from .. import REGISTERED_TKWRAPPER_TYPES - - -# REFERENCE MAPPING BETWEEN PARTIAL CHARGE METHODS AND SUPPORTING TOOLKIT WRAPPERS -SUPPORTED_PARTIAL_CHARGE_METHODS_BY_TOOLKIT : dict[Type[ToolkitWrapper], list[str]]= { # TOSELF : this unfortunately cannot be accessed dynamically as a attribute of each TollkitWrapper class - BuiltInToolkitWrapper : [ - 'zeros', - 'formal_charge', - ], - RDKitToolkitWrapper : [ - 'mmff94', - 'gasteiger' - ], - AmberToolsToolkitWrapper : [ - 'am1bcc', - 'am1-mulliken', - 'gasteiger', - ], - OpenEyeToolkitWrapper : [ - 'am1bcc', - 'am1-mulliken', - 'gasteiger', - 'mmff94', - 'am1bccnosymspt', - 'am1elf10', - 'am1bccelf10', - ], - EspalomaChargeToolkitWrapper : [ - 'espaloma-am1bcc' - ] -} - -TOOLKITS_BY_CHARGE_METHOD : dict[str, list[Type[ToolkitWrapper]]] = defaultdict(list) -for tkwrapper_type, supported_methods in SUPPORTED_PARTIAL_CHARGE_METHODS_BY_TOOLKIT.items(): - if (tkwrapper_type in REGISTERED_TKWRAPPER_TYPES): # exclude non-registered toolkits to avoid confusion - for method in supported_methods: - TOOLKITS_BY_CHARGE_METHOD[method].append(tkwrapper_type) From 61e3980f78cd300042b67734a009ac8f0406d709 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 15:56:30 -0700 Subject: [PATCH 100/191] Cleaned up openfftools subpackage-level __init__ with openff dependency precheck and relative registry improts --- polymerist/mdtools/openfftools/__init__.py | 86 +++++++-------------- polymerist/mdtools/openfftools/_toolkits.py | 19 +---- 2 files changed, 34 insertions(+), 71 deletions(-) diff --git a/polymerist/mdtools/openfftools/__init__.py b/polymerist/mdtools/openfftools/__init__.py index 5e86394..8f1ab65 100644 --- a/polymerist/mdtools/openfftools/__init__.py +++ b/polymerist/mdtools/openfftools/__init__.py @@ -1,60 +1,34 @@ -'''Tools for manipulating and extending OpenFF objects, and for interfacing with other tools and formats''' +'''Extensions, interfaces, and convenience methods built around the functionality in the OpenFF software stack''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from typing import Any -from pathlib import Path - -import openforcefields -from openff.toolkit import ToolkitRegistry -from openff.toolkit import GLOBAL_TOOLKIT_REGISTRY as GTR -from openff.toolkit.utils.base_wrapper import ToolkitWrapper -from openff.toolkit.utils.utils import all_subclasses -from openff.toolkit.utils.exceptions import LicenseError, ToolkitUnavailableException -from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths - -from openff.toolkit.utils.openeye_wrapper import OpenEyeToolkitWrapper -from espaloma_charge.openff_wrapper import EspalomaChargeToolkitWrapper -from openff.nagl.toolkits import NAGLRDKitToolkitWrapper, NAGLOpenEyeToolkitWrapper - - -# FORCE FIELD AND ToolkitWrapper REFERENCE -FFDIR = Path(openforcefields.get_forcefield_dirs_paths()[0]) # Locate path where OpenFF forcefields are installed -FF_DIR_REGISTRY : dict[Path, Path] = {} -FF_PATH_REGISTRY : dict[Path, Path] = {} -for ffdir_str in _get_installed_offxml_dir_paths(): - ffdir = Path(ffdir_str) - ffdir_name = ffdir.parent.stem - - FF_DIR_REGISTRY[ ffdir_name] = ffdir - FF_PATH_REGISTRY[ffdir_name] = [path for path in ffdir.glob('*.offxml')] - -# CHECKING FOR OpenEye -ALL_IMPORTABLE_TKWRAPPERS = all_subclasses(ToolkitWrapper) # References to every registered ToolkitWrapper and ToolkitRegistry -try: - _ = OpenEyeToolkitWrapper() - _OE_TKWRAPPER_IS_AVAILABLE = True - OEUnavailableException = None -except (LicenseError, ToolkitUnavailableException) as error: - _OE_TKWRAPPER_IS_AVAILABLE = False - OEUnavailableException = error # catch and record relevant error message for use (rather than trying to replicate it elsewhere) - -# Register OpenFF-compatible GNN ToolkitWrappers -GTR.register_toolkit(EspalomaChargeToolkitWrapper) -GTR.register_toolkit(NAGLRDKitToolkitWrapper) -if _OE_TKWRAPPER_IS_AVAILABLE: - GTR.register_toolkit(NAGLOpenEyeToolkitWrapper) - - -# GENERATE LOOKUP DICTS FOR EVERY REGISTERED ToolkitWrappers and ToolkitRegistry -REGISTERED_TKWRAPPER_TYPES = [type(tkwrapper) for tkwrapper in GTR.registered_toolkits] -TKWRAPPERS = { # NOTE : this must be done AFTER any new registrations to thr GTR (e.g. after registering GNN ToolkitWrappers) - tk_wrap.toolkit_name : tk_wrap - for tk_wrap in GTR.registered_toolkits -} -TKREGS = {} # individually register toolkit wrappers for cases where a registry must be passed -for tk_name, tk_wrap in TKWRAPPERS.items(): - tk_reg = ToolkitRegistry() - tk_reg.register_toolkit(tk_wrap) - TKREGS[tk_name] = tk_reg \ No newline at end of file +# Subpackage-wide precheck to see if OpenFF is even usable in the first place +from ...genutils.importutils.dependencies import modules_installed +if not modules_installed('openff', 'openff.toolkit'): + raise ModuleNotFoundError( + f''' + OpenFF packages which are required to utilitize {__name__} not found in current environment + Please follow installation instructions at https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html, then retry import + ''' + ) + +# Import of toplevel OpenFF object registries +from ._forcefields import ( + FFDIR, + FF_DIR_REGISTRY, + FF_PATH_REGISTRY, +) +from ._toolkits import ( + ## toolkit registries + GLOBAL_TOOLKIT_REGISTRY, GTR, + POLYMERIST_TOOLKIT_REGISTRY, + ## catalogues of available toolkit wrappers + ALL_IMPORTABLE_TKWRAPPERS, + ALL_AVAILABLE_TKWRAPPERS, + TKWRAPPERS, + TKWRAPPER_TYPES, + ## registry of partial charge methods by + CHARGE_METHODS_BY_TOOLKIT, + TOOLKITS_BY_CHARGE_METHOD, +) \ No newline at end of file diff --git a/polymerist/mdtools/openfftools/_toolkits.py b/polymerist/mdtools/openfftools/_toolkits.py index edae150..9e6bb40 100644 --- a/polymerist/mdtools/openfftools/_toolkits.py +++ b/polymerist/mdtools/openfftools/_toolkits.py @@ -3,24 +3,11 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -# Subpackage-wide precheck to see if OpenFF is even usable in the first place -from ...genutils.importutils.dependencies import modules_installed -if not modules_installed('openff', 'openff.toolkit'): - raise ModuleNotFoundError( - f''' - OpenFF packages which are required to utilitize {__name__} not found in current environment - Please follow installation instructions at https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html, then retry import - ''' - ) - # Core OpenFF toolkit component registration from typing import Union from collections import defaultdict from openff.toolkit.utils.utils import all_subclasses -from openff.toolkit.utils.exceptions import LicenseError, ToolkitUnavailableException -from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths - from openff.toolkit.utils.base_wrapper import ToolkitWrapper from openff.toolkit.utils.toolkit_registry import ToolkitRegistry from openff.toolkit.utils.toolkits import ( @@ -31,9 +18,11 @@ ) GTR = GLOBAL_TOOLKIT_REGISTRY # alias for brevity - -# Config and utility functions +from ...genutils.importutils.dependencies import modules_installed _REGISTER_TOOLKITS_TO_GLOBAL : bool = True # TODO: find way to avoid setting this config parameter directly in code + + +# Helper functions def toolkit_wrapper_is_registered(toolkit_wrapper : Union[ToolkitWrapper, type[ToolkitWrapper]], toolkit_registry : ToolkitRegistry=GTR) -> bool: '''Check whether a ToolkitRegistry instance has already registered a given ToolkitWrapper subclass''' if not isinstance(toolkit_wrapper, type): # ToolkitWrapper TYPES are needed for this check; any instances therefore... From b4afb52e5349f2d9ab459be8dd11699979ed9d89 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 16:00:17 -0700 Subject: [PATCH 101/191] Fixed back-to-front imports for offxml getter functions --- polymerist/mdtools/openfftools/_forcefields.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/mdtools/openfftools/_forcefields.py b/polymerist/mdtools/openfftools/_forcefields.py index a4d64d9..58199fe 100644 --- a/polymerist/mdtools/openfftools/_forcefields.py +++ b/polymerist/mdtools/openfftools/_forcefields.py @@ -12,14 +12,14 @@ # Force field and ToolkitWrapper reference FFDIR : Optional[Path] = None if modules_installed('openff.toolkit'): - from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths + from openforcefields import get_forcefield_dirs_paths FFDIR = Path(get_forcefield_dirs_paths()[0]) # Locate path where OpenFF forcefields are installed FF_DIR_REGISTRY : dict[Path, Path] = {} FF_PATH_REGISTRY : dict[Path, Path] = {} if modules_installed('openforcefields'): - from openforcefields import get_forcefield_dirs_paths + from openff.toolkit.typing.engines.smirnoff.forcefield import _get_installed_offxml_dir_paths for ffdir_str in _get_installed_offxml_dir_paths(): ffdir = Path(ffdir_str) From a2d3881e07fd536b2e6700886654bc78b069504a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 19:11:43 -0700 Subject: [PATCH 102/191] Shunted openff.units dependency into mdtools.openfftools subpackage --- polymerist/mdtools/openfftools/solvation/physprops.py | 1 + polymerist/unitutils/dimensions.py | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/polymerist/mdtools/openfftools/solvation/physprops.py b/polymerist/mdtools/openfftools/solvation/physprops.py index 5db9a28..bea8dae 100644 --- a/polymerist/mdtools/openfftools/solvation/physprops.py +++ b/polymerist/mdtools/openfftools/solvation/physprops.py @@ -50,6 +50,7 @@ def number_density(density : Quantity, MW : Quantity) -> Quantity: return (density / MW) * AVOGADRO_CONSTANT_NA # NUMBER +@allow_openff_units def num_mols_in_box(mol : Union[Mol, Molecule, Topology], box_vol : Quantity, density : Quantity) -> int: '''Return the number of particles/molecules needed to fill a box of given volume to the specified density''' assert(is_volume(box_vol.unit)) diff --git a/polymerist/unitutils/dimensions.py b/polymerist/unitutils/dimensions.py index 15acd2a..c4276d9 100644 --- a/polymerist/unitutils/dimensions.py +++ b/polymerist/unitutils/dimensions.py @@ -5,9 +5,11 @@ from typing import Any, Union -from pint import Quantity as PintQuantity # this is also the base class for all OpenFF-style units from openmm.unit import Quantity, Unit, length_dimension -from .interop import allow_openmm_units, allow_openff_units +from pint import ( # this is also the base classes for all OpenFF-style units + Unit as PintUnit, + Quantity as PintQuantity, +) # CHECKING FOR AND REMOVING UNITS @@ -31,7 +33,6 @@ def strip_units(coords : Union[tuple, PintQuantity, Quantity]) -> tuple[float]: return coords # CHECKING DIMENSIONALITY -@allow_openff_units def is_volume(unit_val : Union[Unit, Quantity]) -> bool: '''Return whether a unit corresponds to a volume''' if isinstance(unit_val, Quantity): From 8bc73260a8cd3dc2521f6d47be42eeb84f130fc9 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 19:21:26 -0700 Subject: [PATCH 103/191] Created openfftools subpackage for OpenMM interoperability, absorbed relevant functionality in backwards-compatible manner --- polymerist/mdtools/openfftools/boxvectors.py | 2 +- polymerist/mdtools/openfftools/omminter/__init__.py | 12 ++++++++++++ .../{omminter.py => omminter/mdobjects.py} | 11 +++++------ .../openfftools/omminter/unitsys.py} | 2 +- .../mdtools/openfftools/solvation/physprops.py | 2 +- 5 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 polymerist/mdtools/openfftools/omminter/__init__.py rename polymerist/mdtools/openfftools/{omminter.py => omminter/mdobjects.py} (89%) rename polymerist/{unitutils/interop.py => mdtools/openfftools/omminter/unitsys.py} (94%) diff --git a/polymerist/mdtools/openfftools/boxvectors.py b/polymerist/mdtools/openfftools/boxvectors.py index 011a810..49ea028 100644 --- a/polymerist/mdtools/openfftools/boxvectors.py +++ b/polymerist/mdtools/openfftools/boxvectors.py @@ -14,7 +14,7 @@ from openff.toolkit import Topology from openff.interchange.components._packmol import _box_vectors_are_in_reduced_form -from ...unitutils.interop import allow_openmm_units, openff_to_openmm +from .omminter.unitsys import allow_openmm_units, openff_to_openmm # CUSTOM TYPES FOR CLARITY, ESPECIALLY WITH UNITS diff --git a/polymerist/mdtools/openfftools/omminter/__init__.py b/polymerist/mdtools/openfftools/omminter/__init__.py new file mode 100644 index 0000000..db12755 --- /dev/null +++ b/polymerist/mdtools/openfftools/omminter/__init__.py @@ -0,0 +1,12 @@ +'''For interfacing between OpenFF and OpenMM''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +from .mdobjects import forcefield_flexible, openff_topology_to_openmm +from .unitsys import ( + openmm_to_openff, + openff_to_openmm, + allow_openmm_units, + allow_openff_units, +) \ No newline at end of file diff --git a/polymerist/mdtools/openfftools/omminter.py b/polymerist/mdtools/openfftools/omminter/mdobjects.py similarity index 89% rename from polymerist/mdtools/openfftools/omminter.py rename to polymerist/mdtools/openfftools/omminter/mdobjects.py index 199b239..2ed2be9 100644 --- a/polymerist/mdtools/openfftools/omminter.py +++ b/polymerist/mdtools/openfftools/omminter/mdobjects.py @@ -1,4 +1,4 @@ -'''For interfacing between OpenFF and OpenMM representations, along with the file analogues''' +'''For interfacing between OpenFF and OpenMM representations of Topologies and other MD primitives''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' @@ -6,7 +6,6 @@ from typing import Optional, Union from pathlib import Path -from numpy import ndarray from openff.toolkit import ForceField from openff.interchange import Interchange @@ -16,12 +15,12 @@ from openmm.app import Topology as OMMTopology from openmm.unit import Quantity -from . import FFDIR -from .boxvectors import box_vectors_flexible, VectorQuantity, BoxVectorsQuantity -from ...unitutils.interop import openff_to_openmm +from .unitsys import openff_to_openmm +from .. import FFDIR +from ..boxvectors import box_vectors_flexible, VectorQuantity, BoxVectorsQuantity -def forcefield_flexible(forcefield : Union[ForceField, str, Path]) -> ForceField: +def forcefield_flexible(forcefield : Union[ForceField, str, Path]) -> ForceField: # DEV: consider deprecating '''For making forcefield input to other functions more flexible (can accept a literal ForceField, a string name, or a Path to the forcefield)''' if isinstance(forcefield, ForceField): return forcefield diff --git a/polymerist/unitutils/interop.py b/polymerist/mdtools/openfftools/omminter/unitsys.py similarity index 94% rename from polymerist/unitutils/interop.py rename to polymerist/mdtools/openfftools/omminter/unitsys.py index b36b7d3..46ff0c4 100644 --- a/polymerist/unitutils/interop.py +++ b/polymerist/mdtools/openfftools/omminter/unitsys.py @@ -1,4 +1,4 @@ -'''Decorators for handling interconversion between the OpenMM and OpenFF (Pint) unit engines''' +'''For handling interconversion between the OpenMM and OpenFF (Pint) unit engines''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/mdtools/openfftools/solvation/physprops.py b/polymerist/mdtools/openfftools/solvation/physprops.py index bea8dae..1de70a2 100644 --- a/polymerist/mdtools/openfftools/solvation/physprops.py +++ b/polymerist/mdtools/openfftools/solvation/physprops.py @@ -15,7 +15,7 @@ from openff.units import Quantity as OFFQuantity from ....unitutils.dimensions import is_volume -from ....unitutils.interop import allow_openff_units, openff_to_openmm +from ..omminter.unitsys import allow_openff_units, openff_to_openmm # MASS From 43836a32bb9963541aa42ff78541b22280bb4db8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 19:22:50 -0700 Subject: [PATCH 104/191] Added unit test placeholder for omminter --- polymerist/tests/mdtools/openfftools/omminter/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 polymerist/tests/mdtools/openfftools/omminter/__init__.py diff --git a/polymerist/tests/mdtools/openfftools/omminter/__init__.py b/polymerist/tests/mdtools/openfftools/omminter/__init__.py new file mode 100644 index 0000000..e906b2b --- /dev/null +++ b/polymerist/tests/mdtools/openfftools/omminter/__init__.py @@ -0,0 +1,4 @@ +'''Unit tests for `omminter` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' From 1f8fb8dbb3c48c27bc5e7698449cd50cb6ac5c6c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 20:35:17 -0700 Subject: [PATCH 105/191] Simplified and clarified typehints --- .../mdtools/openfftools/omminter/unitsys.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/polymerist/mdtools/openfftools/omminter/unitsys.py b/polymerist/mdtools/openfftools/omminter/unitsys.py index 46ff0c4..ae82d1e 100644 --- a/polymerist/mdtools/openfftools/omminter/unitsys.py +++ b/polymerist/mdtools/openfftools/omminter/unitsys.py @@ -4,37 +4,37 @@ __email__ = 'timotej.bernat@colorado.edu' from typing import Callable, TypeVar -R = TypeVar('R') # for representing generic return values -Q = TypeVar('Q') # for representing generic Quantity-like objects -from openmm.unit import Quantity +ReturnType = TypeVar('ReturnType') + +from openmm.unit import Quantity as OpenMMQuantity from pint import Quantity as PintQuantity # this is also the base class for all OpenFF-style units -from openff.units import Quantity as OFFQuantity + from openff.units.openmm import ( from_openmm as openmm_to_openff, to_openmm as openff_to_openmm, ) -def allow_openmm_units(funct : Callable[[Q], R]) -> Callable[[Q], R]: - '''Allow a Callable which expects ALL of its args to be OpenFF Quantities to also accept equivalent OpenMM Quantites''' - def wrapper(*args, **kwargs) -> R: +def allow_openmm_units(funct : Callable[..., ReturnType]) -> Callable[..., ReturnType]: + '''Allow a Callable which expects any of its args to be OpenFF Quantities to also accept equivalent OpenMM Quantites''' + def wrapper(*args, **kwargs) -> ReturnType: new_args = [ - openmm_to_openff(arg) if isinstance(arg, Quantity) else arg + openmm_to_openff(arg) if isinstance(arg, OpenMMQuantity) else arg for arg in args ] new_kwargs = { - key : openmm_to_openff(kwarg) if isinstance(kwarg, Quantity) else kwarg + key : openmm_to_openff(kwarg) if isinstance(kwarg, OpenMMQuantity) else kwarg for key, kwarg in kwargs.items() } return funct(*new_args, **new_kwargs) return wrapper -def allow_openff_units(funct : Callable[[Q], R]) -> Callable[[Q], R]: - '''Allow a Callable which expects ALL of its args to be OpenMM Quantities to also accept equivalent OpenFF Quantites''' - def wrapper(*args, **kwargs) -> R: +def allow_openff_units(funct : Callable[..., ReturnType]) -> Callable[..., ReturnType]: + '''Allow a Callable which expects any of its args to be OpenMM Quantities to also accept equivalent OpenFF Quantites''' + def wrapper(*args, **kwargs) -> ReturnType: new_args = [ openff_to_openmm(arg) if isinstance(arg, PintQuantity) else arg for arg in args From c06c384dbb015d8011c6d09a4b08c987cb37ab93 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 20:36:01 -0700 Subject: [PATCH 106/191] Extended is_volume() to support Pint-style objects, clarified unit-like typehints --- polymerist/unitutils/dimensions.py | 53 +++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/polymerist/unitutils/dimensions.py b/polymerist/unitutils/dimensions.py index c4276d9..679a749 100644 --- a/polymerist/unitutils/dimensions.py +++ b/polymerist/unitutils/dimensions.py @@ -3,13 +3,25 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from typing import Any, Union +from typing import Any, Union, TypeVar +T = TypeVar('T') + +from numpy import ndarray +from openmm.unit import ( + Unit as OpenMMUnit, + Quantity as OpenMMQuantity, + length_dimension, +) +OpenMMUnitLike = Union[OpenMMUnit, OpenMMQuantity] # TODO: add union type checkers -from openmm.unit import Quantity, Unit, length_dimension from pint import ( # this is also the base classes for all OpenFF-style units Unit as PintUnit, Quantity as PintQuantity, ) +PintUnitLike = Union[PintUnit, PintQuantity] # TODO: add union type checkers + +Unit = Union[PintUnit , OpenMMUnit] +Quantity = Union[PintQuantity, OpenMMQuantity] # CHECKING FOR AND REMOVING UNITS @@ -20,28 +32,45 @@ def hasunits(obj : Any) -> bool: '''Naive but effective way of checking for pint and openmm units''' return any(hasattr(obj, attr) for attr in ('unit', 'units')) -def strip_units(coords : Union[tuple, PintQuantity, Quantity]) -> tuple[float]: +def strip_units(coords : Union[T, PintQuantity, OpenMMQuantity]) -> Union[T, ndarray[Any]]: ''' Sanitize coordinate tuples for cases which require unitless quantities Specifically needed since OpenMM and pint each have their own Quantity and Units classes ''' if isinstance(coords, PintQuantity): - return coords.magnitude - elif isinstance(coords, Quantity): + return coords.magnitude # for container-like values (e.g. tuples), will always return numpy array instead (not type-safe!) + elif isinstance(coords, OpenMMQuantity): return coords._value return coords # CHECKING DIMENSIONALITY -def is_volume(unit_val : Union[Unit, Quantity]) -> bool: - '''Return whether a unit corresponds to a volume''' - if isinstance(unit_val, Quantity): - unit_val = unit_val.unit # extract just the unit component if a Quantity is passed +def _is_volume_openmm(unitlike : OpenMMUnitLike) -> bool: + '''Check whether an OpenMM Unit/Quantity dimensionally corresponds to a volume''' + if isinstance(unitlike, OpenMMQuantity): + unitlike = unitlike.unit # extract just the unit component if a Quantity is passed - for i, (dim, exp) in enumerate(unit_val.iter_base_dimensions()): + for i, (dim, exp) in enumerate(unitlike.iter_base_dimensions()): if i > 0: - return False # immediate rule out if more than just one unit is present + return False # immediate rule out if more than just one dimension is present if (dim == length_dimension) and (exp == 3.0): # if monodimensional, check that the single dimension is L^3 return True - return False \ No newline at end of file + return False + +def _is_volume_pint(unitlike : PintUnitLike) -> bool: + '''Check whether an Pint Unit/Quantity dimensionally corresponds to a volume''' + return unitlike.dimensionality == '[length]**3' # "dimensionality" attr is present on both the Unit and Quantity classes in Pint + +def is_volume(unitlike : Union[Unit, Quantity]) -> bool: + ''' + Check whether a Unit or Quantity dimensionally corresponds to a volume + Accepts both OpenMM-style and Pint-style unit-like objects + ''' + if isinstance(unitlike, OpenMMUnitLike): + return _is_volume_openmm(unitlike) + elif isinstance(unitlike, PintUnitLike): + return _is_volume_pint(unitlike) + else: + # raise TypeError(f'Cannot interpret object of type "{type(unitlike).__name__}" as unit-like') + return False # strictly speaking, anything which has no notion of units cannot be a volume \ No newline at end of file From 8f0fbde90d1643466d8926fdf6ca658a60690245 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 20:36:24 -0700 Subject: [PATCH 107/191] Wrote unit tests for unitutils.dimensions --- polymerist/tests/unitutils/test_dimensions.py | 142 ++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 polymerist/tests/unitutils/test_dimensions.py diff --git a/polymerist/tests/unitutils/test_dimensions.py b/polymerist/tests/unitutils/test_dimensions.py new file mode 100644 index 0000000..e13720f --- /dev/null +++ b/polymerist/tests/unitutils/test_dimensions.py @@ -0,0 +1,142 @@ +'''Testing that dimensionality checking behaves as expected for both OpenMm and Pint-style unit systems''' + +from typing import Any, Union +from dataclasses import dataclass + +import pytest + +from openmm.unit import ( + Unit as OpenMMUnit, + Quantity as OpenMMQuantity, + centimeter, + second, +) +from pint import ( + Unit as PintUnit, + Quantity as PintQuantity, # this is also the base class for all OpenFF-style units + UnitRegistry, +) +ureg = UnitRegistry() + +from polymerist.unitutils.dimensions import ( + hasunits, + strip_units, + is_volume, +) + + +# Defining test cases and expected outputs +@dataclass +class UnitExample: + '''Internal encapsulation class for indicating expected + properties of unit-like objects in unit tests (no pun intended)''' + value : Any + has_units : bool + is_a_volume : bool # changed name slightly to obviate clash with is_volume() in namespace + +test_cases : list[UnitExample] = [ + # non-units + UnitExample( + value=42, + has_units=False, + is_a_volume=False, + ), + UnitExample( + value=3.1415, + has_units=False, + is_a_volume=False, + ), + UnitExample( + value={1,2,3}, + has_units=False, + is_a_volume=False, + ), + # pure units + UnitExample( + value=second, + has_units=False, + is_a_volume=False, + ), + UnitExample( + value=centimeter**3, + has_units=False, + is_a_volume=True, # despite being a pure unit, this should still count as a volume + ), + UnitExample( + value=ureg.second, + has_units=False, + is_a_volume=False, + ), + UnitExample( + value=ureg.foot**3, + has_units=False, + is_a_volume=True, + ), + # simple quantities + UnitExample( + value=1.1*second, + has_units=True, + is_a_volume=False, + ), + UnitExample( + value=1.2*centimeter, + has_units=True, + is_a_volume=False, + ), + UnitExample( + value=1.3*centimeter**3, + has_units=True, + is_a_volume=True, + ), + UnitExample( + value=2.1*ureg.second, + has_units=True, + is_a_volume=False, + ), + UnitExample( + value=2.2*ureg.centimeter, + has_units=True, + is_a_volume=False, + ), + UnitExample( + value=2.3*ureg.centimeter**3, + has_units=True, + is_a_volume=True, + ), + # mixed quantities + UnitExample( + value=9.8*centimeter*second**-1, + has_units=True, + is_a_volume=False, + ), + UnitExample( + value=9.81*ureg.centimeter*ureg.second**-1, + has_units=True, + is_a_volume=False, + ) +] + +# Unit tests +@pytest.mark.parametrize('unitlike, expected_output', [ + (unit_example.value, unit_example.has_units) + for unit_example in test_cases + ] +) +def test_hasunits(unitlike : Any, expected_output : bool) -> None: + '''Test that objects with (and without) units are correctly identified''' + assert hasunits(unitlike) == expected_output + +@pytest.mark.parametrize('unitlike, expected_output', [ + (unit_example.value, unit_example.is_a_volume) + for unit_example in test_cases + ] +) +def test_hasunits(unitlike : Any, expected_output : bool) -> None: + '''Test that objects which can (and can't) be interpreted as volumes are correctly identified as such''' + assert is_volume(unitlike) == expected_output + +SAMPLE_COORDS : tuple[float] = (1.23, 4.56, 7.89) # these numbers are arbitrary, but need to be consistent across tests +@pytest.mark.parametrize('coordlike', [SAMPLE_COORDS, SAMPLE_COORDS*centimeter, SAMPLE_COORDS*ureg.centimeter]) +def test_strip_units(coordlike : Union[tuple, PintQuantity, OpenMMQuantity]) -> None: + '''Test that removing units works for Pint, OpenMM, and unit-free objects''' + assert tuple(strip_units(coordlike)) == SAMPLE_COORDS # need to re-tuplify to counteract numpy auto-conversion by pint \ No newline at end of file From ae9e8b4f391e233ae0d8484f6c74a26cdc07c063 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 20:55:04 -0700 Subject: [PATCH 108/191] Added requirement for "pint" (the units package) --- devtools/conda-envs/release-build.yml | 1 + devtools/conda-envs/test-env.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/devtools/conda-envs/release-build.yml b/devtools/conda-envs/release-build.yml index 2038fa9..f50b3da 100644 --- a/devtools/conda-envs/release-build.yml +++ b/devtools/conda-envs/release-build.yml @@ -25,6 +25,7 @@ dependencies: - openmm - lammps - mdtraj + - pint # for units in case OpenFF is not installed # Molecule building - mbuild diff --git a/devtools/conda-envs/test-env.yml b/devtools/conda-envs/test-env.yml index b5f48f2..d721311 100644 --- a/devtools/conda-envs/test-env.yml +++ b/devtools/conda-envs/test-env.yml @@ -25,6 +25,7 @@ dependencies: - openmm - lammps - mdtraj + - pint # for units in case OpenFF is not installed # Molecule building - mbuild From 9f4b82cf70c5679c6f079c30baf158f06bdf6d39 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 21:00:01 -0700 Subject: [PATCH 109/191] Fixed indents on YAML header comments --- devtools/conda-envs/test-env.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/devtools/conda-envs/test-env.yml b/devtools/conda-envs/test-env.yml index d721311..70395f6 100644 --- a/devtools/conda-envs/test-env.yml +++ b/devtools/conda-envs/test-env.yml @@ -3,12 +3,12 @@ channels: - conda-forge - openeye dependencies: - # Basic Python dependencies + # Basic Python dependencies - python - pip - jupyterlab - # Testing and docs + # Testing and docs - pytest - pytest-cov - codecov From 00f5f9749967edf4d0bc261a4a6cbfdf34305860 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 2 Dec 2024 21:00:48 -0700 Subject: [PATCH 110/191] Inverted import order to cause OpenFF installation error to raise prior to direct openff.toolkit imports --- .../mdtools/openfftools/partialcharge/test_molchargers.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py index 3b43d78..eb291f2 100644 --- a/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py +++ b/polymerist/tests/mdtools/openfftools/partialcharge/test_molchargers.py @@ -5,13 +5,15 @@ import pytest -from openff.toolkit import Molecule -from openff.toolkit.utils.toolkits import OPENEYE_AVAILABLE - +# NOTE: inverted custom imports here to get polymerist.openfftools import first, +# Done so that if OpenFF is not found, a helpful installation error with be raised prior to attempting direct openff.toolkit imports below from polymerist.genutils.importutils.dependencies import modules_installed from polymerist.mdtools.openfftools.partialcharge.molchargers import MolCharger, ABE10Charger, EspalomaCharger, NAGLCharger from polymerist.mdtools.openfftools.partialcharge.rescharge.interface import LibraryCharger +from openff.toolkit import Molecule +from openff.toolkit.utils.toolkits import OPENEYE_AVAILABLE + # Test MolCharger subclass registration def test_molcharger_registers_subclasses() -> None: From 31f79e3403e64f03b33c9c83da5d8e29af1e7109 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 17:44:46 -0700 Subject: [PATCH 111/191] Updated TypeSerializer and derived classes to use register_abstract_class_attr() paradigm for serialized type --- polymerist/genutils/fileutils/jsonio/jsonify.py | 4 +--- polymerist/genutils/fileutils/jsonio/serialize.py | 13 +++++-------- polymerist/polymers/monographs.py | 4 +--- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/polymerist/genutils/fileutils/jsonio/jsonify.py b/polymerist/genutils/fileutils/jsonio/jsonify.py index dab42a1..3354dc3 100644 --- a/polymerist/genutils/fileutils/jsonio/jsonify.py +++ b/polymerist/genutils/fileutils/jsonio/jsonify.py @@ -26,10 +26,8 @@ def dataclass_serializer_factory(cls : Type[C]) -> TypeSerializer: '''For generating a custom TypeSerializer for a JSONifiable dataclass''' assert(is_dataclass(cls)) # can enforce modification only to dataclasses (makes behavior a little more natural) - class DataclassSerializer(TypeSerializer): + class DataclassSerializer(TypeSerializer, python_type=cls): f'''JSON encoder and decoder for the {cls.__name__} dataclass''' - python_type = cls - @staticmethod def encode(python_obj : Path) -> dict[str, Any]: '''Extract dictionary of attributes (may need other external converters to be fully serialized)''' diff --git a/polymerist/genutils/fileutils/jsonio/serialize.py b/polymerist/genutils/fileutils/jsonio/serialize.py index efc01fb..9c0da57 100644 --- a/polymerist/genutils/fileutils/jsonio/serialize.py +++ b/polymerist/genutils/fileutils/jsonio/serialize.py @@ -12,7 +12,7 @@ import numpy as np import openmm.unit -from ...decorators.classmod import register_subclasses +from ...decorators.classmod import register_subclasses, register_abstract_class_attrs # CHECKING IF AN OBJECT IS SERIALIZABLE TO JSON BY DEFAULT @@ -20,9 +20,10 @@ # ABSTRACT INTERFACE FOR DEFINING CUSTOM SERIALIZERS (ENCODER + DECODER) @register_subclasses(key_attr='python_type') +@register_abstract_class_attrs('python_type') class TypeSerializer(ABC): '''Interface for defining how types which are not JSON serializable by default should be encoded and decoded''' - python_type : ClassVar[Type[T]] + python_type : ClassVar[Type[T]] # NOTE: this is kept here purely for static typehinting purposes @abstractstaticmethod def encode(python_obj : T) -> JSONSerializable: @@ -100,10 +101,8 @@ def decoder_hook(self, json_dict : dict[JSONSerializable, JSONSerializable]) -> # CONCRETE IMPLEMENTATIONS -class PathSerializer(TypeSerializer): +class PathSerializer(TypeSerializer, python_type=Path): '''For JSON-serializing OpenMM Quantities''' - python_type = Path - @staticmethod def encode(python_obj : Path) -> str: '''Separate openmm.unit.Quantity's value and units to serialize as a single dict''' @@ -114,10 +113,8 @@ def decode(json_obj : str) -> Path: '''Unpack a value-unit string dict back into a usable openmm.unit.Quantity''' return Path(json_obj) -class QuantitySerializer(TypeSerializer): +class QuantitySerializer(TypeSerializer, python_type=openmm.unit.Quantity): '''For JSON-serializing OpenMM Quantities''' - python_type = openmm.unit.Quantity - @staticmethod def encode(python_obj : openmm.unit.Quantity) -> dict[str, Union[str, float]]: '''Separate openmm.unit.Quantity's value and units to serialize as a single dict''' diff --git a/polymerist/polymers/monographs.py b/polymerist/polymers/monographs.py index 1c7bbe0..28f6fcd 100644 --- a/polymerist/polymers/monographs.py +++ b/polymerist/polymers/monographs.py @@ -163,10 +163,8 @@ def _passes_string_conversion_tests(self) -> tuple[bool, Optional[tuple[int]]]: return True, None MonoGraph = MonomerGraph # alias for convenience -class MonomerGraphSerializer(TypeSerializer): +class MonomerGraphSerializer(TypeSerializer, python_type=MonomerGraph): '''JSON serializer for storing MonomerGraphs as SMIDGE strings ''' - python_type = MonomerGraph - @staticmethod def encode(python_obj : MonomerGraph) -> str: return python_obj.to_smidge_string() From 6ecbf9afb3ea45fbd8c441cfa3ac5c90e7e2a313 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 17:50:46 -0700 Subject: [PATCH 112/191] Reimplemented RDConverter tagging with register_abstract_class_attr() --- polymerist/rdutils/rdconvert.py | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/polymerist/rdutils/rdconvert.py b/polymerist/rdutils/rdconvert.py index 89df721..6d65ac8 100644 --- a/polymerist/rdutils/rdconvert.py +++ b/polymerist/rdutils/rdconvert.py @@ -3,24 +3,20 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from abc import ABC, abstractmethod, abstractproperty +from abc import ABC, abstractmethod from rdkit import Chem from rdkit.Chem.rdchem import Mol -from ..genutils.decorators.classmod import register_subclasses +from ..genutils.decorators.classmod import register_subclasses, register_abstract_class_attrs from .labeling.bijection import bijective_atom_id_iter from .rdprops import copy_rd_props @register_subclasses(key_attr='TAG') +@register_abstract_class_attrs('TAG') class RDConverter(ABC): # TODO : add some optional sanitization measures to ensure valid output and bijection '''For converting an existing RDKit Molecule to and from a particular format to gain new properties''' - @abstractproperty - @classmethod - def TAG(cls): - pass - @abstractmethod def _convert(self, rdmol : Mol) -> Mol: '''Implement conversion mechanism here''' @@ -41,34 +37,29 @@ def convert(self, rdmol : Mol, sanitize : bool=True) -> Mol: return newmol -class SMARTSConverter(RDConverter): - TAG = 'SMARTS' +class SMARTSConverter(RDConverter, TAG='SMARTS'): def _convert(self, rdmol : Mol) -> Mol: return Chem.MolFromSmarts(Chem.MolToSmarts(rdmol)) -class SMILESConverter(RDConverter): - TAG = 'SMILES' +class SMILESConverter(RDConverter, TAG='SMILES'): def _convert(self, rdmol : Mol) -> Mol: return Chem.MolFromSmiles(Chem.MolToSmiles(rdmol), sanitize=False) -class CXSMARTSConverter(RDConverter): +class CXSMARTSConverter(RDConverter, TAG='CXSMARTS'): '''Similar to SMARTSConverter but preserves the 3D structure''' - TAG = 'CXSMARTS' def _convert(self, rdmol : Mol) -> Mol: return Chem.MolFromSmarts(Chem.MolToCXSmarts(rdmol)) -class CXSMILESConverter(RDConverter): +class CXSMILESConverter(RDConverter, TAG='CXSMILES'): '''Similar to SMILESConverter but preserves the 3D structure''' - TAG = 'CXSMILES' def _convert(self, rdmol : Mol) -> Mol: return Chem.MolFromSmiles(Chem.MolToCXSmiles(rdmol), sanitize=False) -class InChIConverter(RDConverter): # TOSELF : this does not preserve atom map num ordering (how to incorporate AuxInfo?) - TAG = 'InChI' +class InChIConverter(RDConverter, TAG='InChI'): + # TOSELF : this does not preserve atom map num ordering (how to incorporate AuxInfo?) def _convert(self, rdmol : Mol) -> Mol: return Chem.AddHs(Chem.MolFromInchi(Chem.MolToInchi(rdmol), removeHs=False, sanitize=False)) -class JSONConverter(RDConverter): - TAG = 'JSON' +class JSONConverter(RDConverter, TAG='JSON'): def _convert(self, rdmol : Mol) -> Mol: return Chem.rdMolInterchange.JSONToMols(Chem.MolToJSON(rdmol))[0] From b17f0d32d0667c5f1195baf705d9081030b6456d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 18:03:28 -0700 Subject: [PATCH 113/191] Registered "ensemble" and "ensemble_name" class attrs with decorator, fixed NVE VerletIntegrator initialization bug --- polymerist/mdtools/openmmtools/thermo.py | 40 +++++------------------- 1 file changed, 7 insertions(+), 33 deletions(-) diff --git a/polymerist/mdtools/openmmtools/thermo.py b/polymerist/mdtools/openmmtools/thermo.py index ce5ef22..a2615a0 100644 --- a/polymerist/mdtools/openmmtools/thermo.py +++ b/polymerist/mdtools/openmmtools/thermo.py @@ -14,9 +14,9 @@ from openmm.openmm import Force, MonteCarloBarostat from openmm.unit import Quantity, kelvin, atmosphere, picosecond +from ...genutils.decorators.classmod import register_subclasses, register_abstract_class_attrs from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable from ...genutils.fileutils.jsonio.serialize import QuantitySerializer -from ...genutils.decorators.classmod import register_subclasses # PARAMETER CLASSES @@ -38,6 +38,7 @@ def __post_init__(self) -> None: # ABSTRACT BASE FOR CREATING ENSEMBLE-SPECIFIC SIMULATION @dataclass @register_subclasses(key_attr='ensemble') +@register_abstract_class_attrs('ensemble', 'ensemble_name') class EnsembleFactory(ABC): '''Base class for implementing interface for generating ensemble-specific simulations''' thermo_params : ThermoParameters @@ -48,18 +49,6 @@ def from_thermo_params(cls, thermo_params : ThermoParameters) -> 'EnsembleFactor return EnsembleFactory.subclass_registry[thermo_params.ensemble](thermo_params) # ENSEMBLE NAMING ATTRIBUTES - @abstractproperty - @classmethod - def ensemble(cls) -> str: # to be implemented for each particular ensemble - '''Specify state variables of ensemble''' - pass - - @abstractproperty - @classmethod - def ensemble_name(cls) -> str: # to be implemented for each particular ensemble - '''Specify name of ensemble''' - pass - @property def desc(self) -> str: '''Verbal description of ensemble''' @@ -99,38 +88,23 @@ def forces(self) -> Optional[Iterable[Force]]: # CONCRETE IMPLEMENTATIONS OF ENSEMBLES @dataclass -class NVESimulationFactory(EnsembleFactory): - thermo_params : ThermoParameters - - ensemble : ClassVar[str] = 'NVE' - ensemble_name : ClassVar[str] = 'microcanonical' - +class NVESimulationFactory(EnsembleFactory, ensemble='NVE', ensemble_name='microcanonical'): def _integrator(self, time_step : Quantity) -> Integrator: - return VerletIntegrator(stepSize=time_step) + return VerletIntegrator(time_step) def _forces(self) -> Optional[Iterable[Force]]: return None @dataclass -class NVTSimulationFactory(EnsembleFactory): # TODO : add implementation support for Andersen and Nose-Hoover thermostats (added to forces instead) - thermo_params : ThermoParameters - - ensemble : ClassVar[str] = 'NVT' - ensemble_name : ClassVar[str] = 'canonical' - +class NVTSimulationFactory(EnsembleFactory, ensemble='NVT', ensemble_name='canonical'): def _integrator(self, time_step : Quantity) -> Integrator: return LangevinMiddleIntegrator(self.thermo_params.temperature, self.thermo_params.friction_coeff, time_step) def _forces(self) -> Optional[Iterable[Force]]: - return None + return None # TODO : add implementation support for Andersen and Nose-Hoover thermostats (added to forces instead) @dataclass -class NPTSimulationFactory(EnsembleFactory): - thermo_params : ThermoParameters - - ensemble : ClassVar[str] = 'NPT' - ensemble_name : ClassVar[str] = 'isothermal-isobaric' - +class NPTSimulationFactory(EnsembleFactory, ensemble='NPT', ensemble_name='isothermal-isobaric'): def _integrator(self, time_step : Quantity) -> Integrator: return LangevinMiddleIntegrator(self.thermo_params.temperature, self.thermo_params.friction_coeff, time_step) From 1adc0b4e242d6d3a2bec251327f7d82a7d3c3f61 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 18:50:37 -0700 Subject: [PATCH 114/191] Added typehints for SMILES and SMARTS strings --- polymerist/smileslib/primitives.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/polymerist/smileslib/primitives.py b/polymerist/smileslib/primitives.py index 0134f3c..844cad9 100644 --- a/polymerist/smileslib/primitives.py +++ b/polymerist/smileslib/primitives.py @@ -3,10 +3,24 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' +from typing import TypeAlias + from rdkit import Chem from rdkit.Chem.rdchem import BondType +# VALIDATION +Smiles : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers +Smarts : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers + +def is_valid_SMARTS(smarts : str) -> bool: + '''Check if SMARTS string is valid (according to RDKit)''' + return (Chem.MolFromSmarts(smarts) is not None) + +def is_valid_SMILES(smiles : str) -> bool: + '''Check if SMARTS string is valid (according to RDKit)''' + return (Chem.MolFromSmiles(smiles) is not None) + # BOND PRIMITIVES AND RELATED OBJECTS BOND_PRIMITIVES = '~-=#$:' BOND_PRIMITIVES_FOR_REGEX = r'[~\-=#$:]' # any of the SMARTS bond primitive chars, with a space to differentiate single-bond hyphen for the regex range char @@ -36,13 +50,3 @@ bonds_by_order[order] = prim_str rdbonds_by_type[bondtype] = rd_bond rdbonds_by_order[order] = rd_bond - - -# VALIDATION -def is_valid_SMARTS(smarts : str) -> bool: - '''Check if SMARTS string is valid (according to RDKit)''' - return (Chem.MolFromSmarts(smarts) is not None) - -def is_valid_SMILES(smiles : str) -> bool: - '''Check if SMARTS string is valid (according to RDKit)''' - return (Chem.MolFromSmiles(smiles) is not None) \ No newline at end of file From 59199131d28e6c3f247ae3ea74e6f5769964b96e Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 20:18:35 -0700 Subject: [PATCH 115/191] Exposed Smiles and Smarts typehints at subpackage level --- polymerist/smileslib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/smileslib/__init__.py b/polymerist/smileslib/__init__.py index 647f88b..649e161 100644 --- a/polymerist/smileslib/__init__.py +++ b/polymerist/smileslib/__init__.py @@ -3,4 +3,4 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from .primitives import is_valid_SMILES, is_valid_SMARTS \ No newline at end of file +from .primitives import is_valid_SMILES, is_valid_SMARTS, Smiles, Smarts \ No newline at end of file From d5cd8fc2c8acba5cde96ce1054c6014f98a10015 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 20:18:56 -0700 Subject: [PATCH 116/191] Added function for uniquifying strings (which can preserve character order) --- polymerist/genutils/textual/strsearch.py | 35 ++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/textual/strsearch.py b/polymerist/genutils/textual/strsearch.py index 919e797..14e080a 100644 --- a/polymerist/genutils/textual/strsearch.py +++ b/polymerist/genutils/textual/strsearch.py @@ -6,8 +6,39 @@ from typing import Callable, Optional from pathlib import Path -from ..fileutils.extensions import FileTypeError +def uniquify_str(string : str, preserve_order : bool=True) -> str: + ''' + Accepts a string and returns another string containing + only the UNIQUE characters in the origin string + + Can specify whether order is important with the "preserve_order" keyword + + Parameters + ---------- + string : str + An arbitrary string on wants the unique characters from + preserve_order : bool, default True + Whether or not to keep the unique characters in the order they are found + For example: + uniquify_str("balaclava", preserve_order=False) -> "bcavl" + uniquify_str("balaclava", preserve_order=True) -> "balcv" + + Returns + ------- + uniquified_str : str + Another string containing only the unique characters in "string" + Order depends on the value of the "preserve_order" parameter + ''' + if not preserve_order: + unique_chars = set(string) + else: + unique_chars = [] + for char in string: + if char not in unique_chars: + unique_chars.append(char) + + return ''.join(unique_chars) def shortest_repeating_substring(string : str) -> str: '''Return the shortest substring such that the passed string can be written as some number of repeats (including 1) of the substring @@ -29,7 +60,7 @@ def filter_text_by_condition(in_text_path : Path, condition : Callable[[str], bo raise PermissionError(f'Attempting to overwrite {in_text_path} with regex filter') # prevent write clash if (out_text_path.suffix != in_text_path.suffix): # prevent file type conversion during transfer - raise FileTypeError(f'Input and output file must have same extension (not {in_text_path.suffix} and {out_text_path.suffix})') + raise ValueError(f'Input and output file must have same extension (not {in_text_path.suffix} and {out_text_path.suffix})') with out_text_path.open('w') as outfile: with in_text_path.open('r') as infile: # readfile is innermost in case error occurs during file read (caught by handler one level up) From 9a6278e0ca76f3cf2b3b4124c2d335a7e19de2fe Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 20:55:45 -0700 Subject: [PATCH 117/191] Updated DOP calculation to check for and yield correct number of monomers regardless of block sequence length --- polymerist/polymers/building.py | 46 +++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index e7898c3..8ccd864 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -16,15 +16,18 @@ from pathlib import Path from rdkit import Chem -from .exceptions import MorphologyError +from .exceptions import InsufficientChainLengthError, MorphologyError from .estimation import estimate_chain_len_linear -from ..polymers.monomers.repr import MonomerGroup -from ..polymers.monomers.specification import SANITIZE_AS_KEKULE from ..genutils.decorators.functional import allow_string_paths +from ..genutils.textual.strsearch import uniquify_str + from ..rdutils.bonding.portlib import get_linker_ids from ..rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports + from ..mdtools.openmmtools.serialization import serialize_openmm_pdb +from ..polymers.monomers.repr import MonomerGroup +from ..polymers.monomers.specification import SANITIZE_AS_KEKULE # CONVERSION @@ -67,7 +70,7 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', if not monomers.is_linear: raise MorphologyError('Linear polymer building does not support non-linear monomer input') - if monomers.has_valid_linear_term_orient: + if monomers.has_valid_linear_term_orient: # DEV: consider moving this logic into MonomerGroup term_orient = monomers.term_orient LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}') else: @@ -77,14 +80,31 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', } LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') - # 1) ADD MIDDLE MONOMERS TO CHAIN + # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) + n_terminal = len(term_orient) # determine how many terminal monomers are actually present and well-defined + block_size = len(sequence) + + if ((DOP - n_terminal) % block_size) != 0: + raise ValueError(f'Cannot build a(n) {DOP}-monomer chain from any number of {block_size}-monomer blocks and {n_terminal} end groups') + # NOTE: not explicitly forcing n_seq_reps to catch lingering float input / inexact division errors + n_seq_reps = (DOP - n_terminal) // block_size # number of times to repeat the block sequence between end groups to reach the target chain length + if n_seq_reps < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced by hand + raise InsufficientChainLengthError(f'{DOP}-monomer chain has few total monomers to accomodate {n_terminal} end groups AND at least 1 middle monomer sequence') + # TODO: consider adding support for fractional sequence lengths IFF that fraction is a rational number whose denominator divides the sequence length... + # ...for example, could allow 5/2 * 'BACA' to be interpreted as 'BACA|BACA|BA'; 5/3 * 'BACA' would still be invalid though + LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_terminal} terminal monomers = {DOP} total monomers)') + + # 2) ADD MIDDLE MONOMERS TO CHAIN chain = MBPolymer() - for (resname, middle_monomer), sequence_key in zip(monomers.iter_rdmols(term_only=False), sequence): # zip with sequence limits number of middle monomers to length of block sequence + for (resname, middle_monomer), sequence_key in zip( + monomers.iter_rdmols(term_only=False), + uniquify_str(sequence, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence + ): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{sequence_key}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer) chain.add_monomer(compound=mb_monomer, indices=linker_ids) - # 2) ADD TERMINAL MONOMERS TO CHAIN + # 3) ADD TERMINAL MONOMERS TO CHAIN term_iters = { # need to convert to iterators to allow for generator-like advancement (required for term group selection to behave as expected) resname : iter(rdmol_list) # made necessary by annoying list-bound structure of current substructure spec for resname, rdmol_list in monomers.rdmols(term_only=True).items() @@ -95,16 +115,16 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer) chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation - # 3) ASSEMBLE AND RETURN CHAIN - n_atoms = estimate_chain_len_linear(monomers, DOP) - LOGGER.info(f'Assembling linear polymer chain with {DOP} monomers ({n_atoms} atoms)') - chain.build(DOP - 2, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) + # 4) ASSEMBLE AND RETURN CHAIN + n_atoms_est = estimate_chain_len_linear(monomers, DOP) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy + LOGGER.info(f'Assembling linear {DOP}-mer chain (estimated {n_atoms_est} atoms)') + chain.build(n_seq_reps, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) - LOGGER.info(f'Successfully assembled linear polymer chain with {DOP} monomers ({n_atoms} atoms)') + LOGGER.info(f'Successfully assembled linear {DOP}-mer chain (exactly {chain.n_particles} atoms)') if energy_minimize: - LOGGER.info('Energy-minimizing chain to find more stabile conformer') + LOGGER.info('Energy-minimizing chain to find more stable conformer') chain.energy_minimize() LOGGER.info('Energy minimization completed') From c18d394f2368bec1fe37b10a4ca4b1817a557003 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 21:02:43 -0700 Subject: [PATCH 118/191] Established placeholder file + sample fragments for polymer building unit tests --- polymerist/tests/data/peg-pla-pga.json | 38 ++++++++++++++++++++++ polymerist/tests/polymers/test_building.py | 18 ++++++++++ 2 files changed, 56 insertions(+) create mode 100644 polymerist/tests/data/peg-pla-pga.json create mode 100644 polymerist/tests/polymers/test_building.py diff --git a/polymerist/tests/data/peg-pla-pga.json b/polymerist/tests/data/peg-pla-pga.json new file mode 100644 index 0000000..2db06af --- /dev/null +++ b/polymerist/tests/data/peg-pla-pga.json @@ -0,0 +1,38 @@ +{ + "__class__": "MonomerGroup", + "__values__": { + "monomers": { + "PEG-1A": [ + "[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]" + ], + "PEG-1B": [ + "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]" + ], + "PEG-2": [ + "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]" + ], + "PLA-1A": [ + "[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]" + ], + "PLA-1B": [ + "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]" + ], + "PLA-2": [ + "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]" + ], + "PGA-1A": [ + "[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]" + ], + "PGA-1B": [ + "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]" + ], + "PGA-2": [ + "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]" + ] + }, + "term_orient": { + "PEG-1A": "head", + "PEG_1B": "tail" + } + } +} \ No newline at end of file diff --git a/polymerist/tests/polymers/test_building.py b/polymerist/tests/polymers/test_building.py new file mode 100644 index 0000000..799123f --- /dev/null +++ b/polymerist/tests/polymers/test_building.py @@ -0,0 +1,18 @@ +'''Unit tests for `attrs` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import pytest +from pathlib import Path + +from polymerist.genutils.importutils.pkginspect import get_file_path_within_package +from polymerist.tests import data as testdata + +from polymerist.polymers import building + +@pytest.fixture +def fragments_path() -> Path: + return get_file_path_within_package('peg=pla-pga.json', testdata) + +# Also add separate tests module for polymers.estimation From 4cc360d179bd260c836c68709f70c515fbcf5c19 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 21:13:21 -0700 Subject: [PATCH 119/191] Added internal used-monomer-only MonomerGroup which improves accuracy of n_atoms estimate --- polymerist/polymers/building.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 8ccd864..42aaaeb 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -94,7 +94,10 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', # ...for example, could allow 5/2 * 'BACA' to be interpreted as 'BACA|BACA|BA'; 5/3 * 'BACA' would still be invalid though LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_terminal} terminal monomers = {DOP} total monomers)') - # 2) ADD MIDDLE MONOMERS TO CHAIN + # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY + monomers_used = MonomerGroup() # used to track and estimate sized of the monomers being used + + ## 2A) ADD MIDDLE MONOMERS TO CHAIN chain = MBPolymer() for (resname, middle_monomer), sequence_key in zip( monomers.iter_rdmols(term_only=False), @@ -103,8 +106,9 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', LOGGER.info(f'Registering middle monomer {resname} (block identifier "{sequence_key}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer) chain.add_monomer(compound=mb_monomer, indices=linker_ids) + monomers_used.monomers[resname] = monomers.monomers[resname] - # 3) ADD TERMINAL MONOMERS TO CHAIN + ## 2B) ADD TERMINAL MONOMERS TO CHAIN term_iters = { # need to convert to iterators to allow for generator-like advancement (required for term group selection to behave as expected) resname : iter(rdmol_list) # made necessary by annoying list-bound structure of current substructure spec for resname, rdmol_list in monomers.rdmols(term_only=True).items() @@ -114,9 +118,10 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', term_monomer = next(term_iters[resname]) mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer) chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation + monomers_used.monomers[resname] = monomers.monomers[resname] - # 4) ASSEMBLE AND RETURN CHAIN - n_atoms_est = estimate_chain_len_linear(monomers, DOP) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy + # 3) ASSEMBLE AND RETURN CHAIN + n_atoms_est = estimate_chain_len_linear(monomers_used, DOP) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy LOGGER.info(f'Assembling linear {DOP}-mer chain (estimated {n_atoms_est} atoms)') chain.build(n_seq_reps, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): From 173990cb29d0cbd8a5314dfb8d68288f1ce4e53f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 21:30:15 -0700 Subject: [PATCH 120/191] Deprecated DOP alias for n_monomers property --- polymerist/polymers/monographs.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polymerist/polymers/monographs.py b/polymerist/polymers/monographs.py index 28f6fcd..8c9b591 100644 --- a/polymerist/polymers/monographs.py +++ b/polymerist/polymers/monographs.py @@ -38,7 +38,6 @@ def get_flavor_dict_at_node_index(self, node_idx : int) -> Optional[dict[int, in def num_monomers(self) -> int: '''Number of monomer units represented in the current polymer''' return self.number_of_nodes() - DOP = num_monomers @property def is_unbranched(self) -> bool: From 8eb726790c34852138d7c1e25fa45ffa5c3122ee Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 3 Dec 2024 21:42:03 -0700 Subject: [PATCH 121/191] Expunged all references to "DOP" in favor of clearer terminology --- polymerist/polymers/building.py | 20 +++++++------- polymerist/polymers/estimation.py | 45 ++++++++++++++++--------------- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 42aaaeb..5105113 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -17,7 +17,7 @@ from rdkit import Chem from .exceptions import InsufficientChainLengthError, MorphologyError -from .estimation import estimate_chain_len_linear +from .estimation import estimate_n_atoms_linear from ..genutils.decorators.functional import allow_string_paths from ..genutils.textual.strsearch import uniquify_str @@ -63,7 +63,7 @@ def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int # LINEAR POLYMER BUILDING -def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', add_Hs : bool=False, energy_minimize : bool=False) -> MBPolymer: +def build_linear_polymer(monomers : MonomerGroup, n_monomers : int, sequence : str='A', add_Hs : bool=False, energy_minimize : bool=False) -> MBPolymer: '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' # 0) VERIFY THAT CHAIN ACTUAL CAN DEFINE LINEAR POLYMER @@ -84,15 +84,15 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', n_terminal = len(term_orient) # determine how many terminal monomers are actually present and well-defined block_size = len(sequence) - if ((DOP - n_terminal) % block_size) != 0: - raise ValueError(f'Cannot build a(n) {DOP}-monomer chain from any number of {block_size}-monomer blocks and {n_terminal} end groups') + if ((n_monomers - n_terminal) % block_size) != 0: + raise ValueError(f'Cannot build a(n) {n_monomers}-monomer chain from any number of {block_size}-monomer blocks and {n_terminal} end groups') # NOTE: not explicitly forcing n_seq_reps to catch lingering float input / inexact division errors - n_seq_reps = (DOP - n_terminal) // block_size # number of times to repeat the block sequence between end groups to reach the target chain length + n_seq_reps = (n_monomers - n_terminal) // block_size # number of times to repeat the block sequence between end groups to reach the target chain length if n_seq_reps < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced by hand - raise InsufficientChainLengthError(f'{DOP}-monomer chain has few total monomers to accomodate {n_terminal} end groups AND at least 1 middle monomer sequence') + raise InsufficientChainLengthError(f'{n_monomers}-monomer chain has few total monomers to accomodate {n_terminal} end groups AND at least 1 middle monomer sequence') # TODO: consider adding support for fractional sequence lengths IFF that fraction is a rational number whose denominator divides the sequence length... # ...for example, could allow 5/2 * 'BACA' to be interpreted as 'BACA|BACA|BA'; 5/3 * 'BACA' would still be invalid though - LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_terminal} terminal monomers = {DOP} total monomers)') + LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_terminal} terminal monomers = {n_monomers} total monomers)') # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY monomers_used = MonomerGroup() # used to track and estimate sized of the monomers being used @@ -121,12 +121,12 @@ def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', monomers_used.monomers[resname] = monomers.monomers[resname] # 3) ASSEMBLE AND RETURN CHAIN - n_atoms_est = estimate_chain_len_linear(monomers_used, DOP) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy - LOGGER.info(f'Assembling linear {DOP}-mer chain (estimated {n_atoms_est} atoms)') + n_atoms_est = estimate_n_atoms_linear(monomers_used, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy + LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') chain.build(n_seq_reps, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) - LOGGER.info(f'Successfully assembled linear {DOP}-mer chain (exactly {chain.n_particles} atoms)') + LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') if energy_minimize: LOGGER.info('Energy-minimizing chain to find more stable conformer') diff --git a/polymerist/polymers/estimation.py b/polymerist/polymers/estimation.py index fa56799..37d2784 100644 --- a/polymerist/polymers/estimation.py +++ b/polymerist/polymers/estimation.py @@ -12,10 +12,10 @@ from ..rdutils.bonding.portlib import get_num_ports -def estimate_chain_len_linear(monomers : MonomerGroup, DOP : int) -> int: +def estimate_n_atoms_linear(monomers : MonomerGroup, n_monomers : int) -> int: '''Given a set of monomers and the desired degree of polymerization, estimate the length of the resulting chain !NOTE! : As-implemented, only works for linear homopolymers and block copolymers with equal an distribution of monomers''' - # TOSELF : omitted logging for now, as it gets repeated on EVERY cycle in when called estimate_DOP_lower + # TOSELF : omitted logging for now, as it gets repeated on EVERY cycle in when called estimate_n_monomers_supremum() num_mono = monomers.n_monomers mono_term = np.zeros(num_mono, dtype=bool) # terminality of each monomer (i.e. whether or not it is a term group) mono_multip = np.zeros(num_mono, dtype=int) # multiplicity of each polymer (i.e. how many times is occurs in a chain) @@ -32,27 +32,30 @@ def estimate_chain_len_linear(monomers : MonomerGroup, DOP : int) -> int: num_term = sum(mono_term) num_mid = num_mono - num_term # assumed that all monomers are either terminal or not - mono_multip[~mono_term] = (DOP - num_term) / num_mid # naive assumption that all middle monomers contribute rest of chain equally (for homopolymers, this is always true) + mono_multip[~mono_term] = (n_monomers - num_term) / num_mid # naive assumption that all middle monomers contribute rest of chain equally (for homopolymers, this is always true) N = mono_contrib @ mono_multip # compute dot product to yield final count return N -def estimate_DOP_lower(monomers : MonomerGroup, max_chain_len : int, min_DOP : int=3) -> int: - '''Returns the largest DOP for a set of monomers which yields a chain no longer than the specified chain length''' - base_chain_len = estimate_chain_len_linear(monomers, min_DOP) - if base_chain_len > max_chain_len: # pre-check when optimization is impossible - raise InsufficientChainLengthError(f'Even shortest possible chain (DOP={min_DOP}, N={base_chain_len}) is longer than the specified max length of {max_chain_len} atoms') - - DOP = min_DOP - while estimate_chain_len_linear(monomers, DOP + 1) < max_chain_len: # check if adding 1 more monomer keeps the length below the threshold - DOP += 1 - - return DOP - -def estimate_DOP_upper(monomers : MonomerGroup, min_chain_len : int, min_DOP : int=3) -> int: # NOTE : as currently defined, this also subsumes the case when the estimate and calculated length are exactly equal - '''Returns the smallest DOP for a set of monomers which yields a chain no shorter than the specified chain length''' - return estimate_DOP_lower(monomers, min_chain_len, min_DOP=min_DOP) + 1 # by definition, this is just 1 monomer longer than the lower bound - -estimate_DOP_infimum = estimate_DOP_upper # more descriptive aliases to alleviate confusion (originals kept in for backwards compatibility) -estimate_DOP_supremum = estimate_DOP_lower # more descriptive aliases to alleviate confusion (originals kept in for backwards compatibility) \ No newline at end of file +def estimate_n_monomers_infimum(monomers : MonomerGroup, n_atoms_max : int, n_monomers_min : int=3) -> int: + ''' + For a given collection of monomer fragments, returns the largest number of monomers which guarantees that + a polymer chain made up of those monomers will have no more than the specified maximum number of atoms + ''' + n_atoms_base = estimate_n_atoms_linear(monomers, n_monomers_min) + if n_atoms_base > n_atoms_max: # pre-check when optimization is impossible + raise InsufficientChainLengthError(f'Even shortest possible chain ({n_monomers_min} monomers, with {n_atoms_base} atoms) is longer than the specified max length of {n_atoms_max} atoms') + + n_monomers = n_monomers_min + while estimate_n_atoms_linear(monomers, n_monomers + 1) < n_atoms_max: # check if adding 1 more monomer keeps the length below the threshold + n_monomers += 1 + + return n_monomers + +def estimate_n_monomers_supremum(monomers : MonomerGroup, n_atoms_min : int, n_monomers_min : int=3) -> int: # NOTE : as currently defined, this also subsumes the case when the estimate and calculated length are exactly equal + ''' + For a given collection of monomer fragments, returns the smallest number of monomers which guarantees that + a polymer chain made up of those monomers will have no fewer than the specified minimum number of atoms + ''' + return estimate_n_monomers_infimum(monomers, n_atoms_min, n_monomers_min=n_monomers_min) + 1 # by definition, a ny more monomers than the infimum guarantees the chain will surpass a given number of atoms \ No newline at end of file From 88f8b6b1d1ccd3b438ecbbf593ed6511924ea1b4 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 14:33:29 -0700 Subject: [PATCH 122/191] Deprecated filter_text_by_condition() --- polymerist/genutils/textual/strsearch.py | 30 +----------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/polymerist/genutils/textual/strsearch.py b/polymerist/genutils/textual/strsearch.py index 14e080a..ac52489 100644 --- a/polymerist/genutils/textual/strsearch.py +++ b/polymerist/genutils/textual/strsearch.py @@ -3,9 +3,6 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from typing import Callable, Optional -from pathlib import Path - def uniquify_str(string : str, preserve_order : bool=True) -> str: ''' @@ -44,29 +41,4 @@ def shortest_repeating_substring(string : str) -> str: '''Return the shortest substring such that the passed string can be written as some number of repeats (including 1) of the substring Will return the original string if no simpler decomposition exists''' i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats) - return string if (i == -1) else string[:i] - -def filter_text_by_condition(in_text_path : Path, condition : Callable[[str], bool], out_text_path : Optional[Path]=None, postfix : str='filtered', inclusive : bool=True, return_filtered_path : bool=False) -> Optional[Path]: - '''Create a copy of a text-based file containing only the lines which match to a given boolean condition - - If no explicit output path is given, will create an output file in the same directory as the source file - with the same name plus "postfix" tacked on. Can optionally return the path to the filtered file (else None) - - "Inclusive" kw governs whether to write lines which DO or DON'T meet the condition''' - if out_text_path is None: - out_text_path = in_text_path.with_stem(f'{in_text_path.stem}{"_" if postfix else ""}{postfix}') - - if (out_text_path == in_text_path): - raise PermissionError(f'Attempting to overwrite {in_text_path} with regex filter') # prevent write clash - - if (out_text_path.suffix != in_text_path.suffix): # prevent file type conversion during transfer - raise ValueError(f'Input and output file must have same extension (not {in_text_path.suffix} and {out_text_path.suffix})') - - with out_text_path.open('w') as outfile: - with in_text_path.open('r') as infile: # readfile is innermost in case error occurs during file read (caught by handler one level up) - for line in infile: - if (condition(line) == inclusive): # only write lines if (matching AND inclusive) OR (not matching AND exclusive) - outfile.write(line) - - if return_filtered_path: - return out_text_path \ No newline at end of file + return string if (i == -1) else string[:i] \ No newline at end of file From 98cec8906ed8b06edd563306ca55d07682f6d1a8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 14:35:05 -0700 Subject: [PATCH 123/191] Renamed textual.strsearch to textual.substrings, updated docstring --- polymerist/genutils/textual/{strsearch.py => substrings.py} | 2 +- polymerist/polymers/building.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename polymerist/genutils/textual/{strsearch.py => substrings.py} (94%) diff --git a/polymerist/genutils/textual/strsearch.py b/polymerist/genutils/textual/substrings.py similarity index 94% rename from polymerist/genutils/textual/strsearch.py rename to polymerist/genutils/textual/substrings.py index ac52489..7a43815 100644 --- a/polymerist/genutils/textual/strsearch.py +++ b/polymerist/genutils/textual/substrings.py @@ -1,4 +1,4 @@ -'''For searching and replacing through strings and text files''' +'''For identifying and concatenating substrings of other strings with unique properties''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 5105113..9b08067 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -20,7 +20,7 @@ from .estimation import estimate_n_atoms_linear from ..genutils.decorators.functional import allow_string_paths -from ..genutils.textual.strsearch import uniquify_str +from ..genutils.textual.substrings import uniquify_str from ..rdutils.bonding.portlib import get_linker_ids from ..rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports From de314e758ff010816539a51dfa7548518c552e20 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 14:35:46 -0700 Subject: [PATCH 124/191] Implemented function for repeating a string a (possibly fractional) number of times --- polymerist/genutils/textual/substrings.py | 38 ++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py index 7a43815..d9b7662 100644 --- a/polymerist/genutils/textual/substrings.py +++ b/polymerist/genutils/textual/substrings.py @@ -41,4 +41,40 @@ def shortest_repeating_substring(string : str) -> str: '''Return the shortest substring such that the passed string can be written as some number of repeats (including 1) of the substring Will return the original string if no simpler decomposition exists''' i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats) - return string if (i == -1) else string[:i] \ No newline at end of file + return string if (i == -1) else string[:i] + +def repeat_string_to_length(string : str, target_length : int) -> str: + ''' + Takes a string and repeats it cyclically to produce another string of a given length + The number of times the original string occurs in the new string may be fractional + for example: + >> repeat_string_to_length("CAT", 6) -> "CATCAT" + >> repeat_string_to_length("BACA", 10) -> "BACABACABA" + + Parameters + ---------- + string : str + An arbitrary string to repeat + target_length : int + The length of the final desired string + This does NOT have to be an integer multiple of the length of "string" + E.g. repeat_string_to_length("BACA", 10) -> "BACABACABA" + Nor does it have to be greater than the length of "string" + E.g. repeat_string_to_length("BACA", 3) -> "BAC" + + Returns + ------- + rep_string : str + A new string which has the desired target length and consists of cycles of the initial string + ''' + if not string: + raise ValueError(f'Cannot generate nonempty string from any amount of repeats of the empty string') + return (string*(target_length//len(string) + 1))[:target_length] # repeat to smallest # time + + # Implementation 2) more readable, but slightly slower in benchmark + # whole_reps, fract_reps = divmod(target_length, len(string)) + # return whole_reps*string + string[fract_reps:] + + # Implementation 3) most compact, but introduces itertools dependency + # Interestingly, this yields empty string instead of division-by-zero error w/ empty string as input + # return ''.join(islice(cycle(string), target_length)) \ No newline at end of file From 3685b396f754af6871ee74c61071a8a17a839059 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 15:40:36 -0700 Subject: [PATCH 125/191] Added argument for indicating separator between string repeats --- polymerist/genutils/textual/substrings.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py index d9b7662..ea3d8e5 100644 --- a/polymerist/genutils/textual/substrings.py +++ b/polymerist/genutils/textual/substrings.py @@ -43,7 +43,7 @@ def shortest_repeating_substring(string : str) -> str: i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats) return string if (i == -1) else string[:i] -def repeat_string_to_length(string : str, target_length : int) -> str: +def repeat_string_to_length(string : str, target_length : int, join_indicator : str='') -> str: ''' Takes a string and repeats it cyclically to produce another string of a given length The number of times the original string occurs in the new string may be fractional @@ -69,12 +69,9 @@ def repeat_string_to_length(string : str, target_length : int) -> str: ''' if not string: raise ValueError(f'Cannot generate nonempty string from any amount of repeats of the empty string') - return (string*(target_length//len(string) + 1))[:target_length] # repeat to smallest # time - # Implementation 2) more readable, but slightly slower in benchmark - # whole_reps, fract_reps = divmod(target_length, len(string)) - # return whole_reps*string + string[fract_reps:] + num_str_reps, num_extra_chars = divmod(target_length, len(string)) + remainder = (string[:num_extra_chars]) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty - # Implementation 3) most compact, but introduces itertools dependency - # Interestingly, this yields empty string instead of division-by-zero error w/ empty string as input - # return ''.join(islice(cycle(string), target_length)) \ No newline at end of file + return join_indicator.join(num_str_reps*(string,) + remainder) # tuples here are ~2 OOM faster than moral equivalent with lists + \ No newline at end of file From 497791e169db0c7cd5a611faaef6f79e4d49f293 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 15:41:06 -0700 Subject: [PATCH 126/191] Renamed uniquify_str() to unique_string() --- polymerist/genutils/textual/substrings.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py index ea3d8e5..d6192db 100644 --- a/polymerist/genutils/textual/substrings.py +++ b/polymerist/genutils/textual/substrings.py @@ -4,7 +4,7 @@ __email__ = 'timotej.bernat@colorado.edu' -def uniquify_str(string : str, preserve_order : bool=True) -> str: +def unique_string(string : str, preserve_order : bool=True) -> str: ''' Accepts a string and returns another string containing only the UNIQUE characters in the origin string @@ -18,8 +18,8 @@ def uniquify_str(string : str, preserve_order : bool=True) -> str: preserve_order : bool, default True Whether or not to keep the unique characters in the order they are found For example: - uniquify_str("balaclava", preserve_order=False) -> "bcavl" - uniquify_str("balaclava", preserve_order=True) -> "balcv" + unique_string("balaclava", preserve_order=False) -> "bcavl" + unique_string("balaclava", preserve_order=True) -> "balcv" Returns ------- From 3c7448e9f728d651bbcb7f6240ba5ff44a325d1a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 15:48:40 -0700 Subject: [PATCH 127/191] Renamed "join_indicator" to "joiner" for brevity --- polymerist/genutils/textual/substrings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py index d6192db..924d2aa 100644 --- a/polymerist/genutils/textual/substrings.py +++ b/polymerist/genutils/textual/substrings.py @@ -43,7 +43,7 @@ def shortest_repeating_substring(string : str) -> str: i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats) return string if (i == -1) else string[:i] -def repeat_string_to_length(string : str, target_length : int, join_indicator : str='') -> str: +def repeat_string_to_length(string : str, target_length : int, joiner : str='') -> str: ''' Takes a string and repeats it cyclically to produce another string of a given length The number of times the original string occurs in the new string may be fractional @@ -73,5 +73,5 @@ def repeat_string_to_length(string : str, target_length : int, join_indicator : num_str_reps, num_extra_chars = divmod(target_length, len(string)) remainder = (string[:num_extra_chars]) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty - return join_indicator.join(num_str_reps*(string,) + remainder) # tuples here are ~2 OOM faster than moral equivalent with lists + return joiner.join(num_str_reps*(string,) + remainder) # tuples here are ~2 OOM faster than moral equivalent with lists \ No newline at end of file From 13c9db7820c8ebef824a6a90683dd6010845661a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 16:11:37 -0700 Subject: [PATCH 128/191] Fixed bug with parenthesization vs tuplification --- polymerist/genutils/textual/substrings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py index 924d2aa..2d9041f 100644 --- a/polymerist/genutils/textual/substrings.py +++ b/polymerist/genutils/textual/substrings.py @@ -71,7 +71,7 @@ def repeat_string_to_length(string : str, target_length : int, joiner : str='') raise ValueError(f'Cannot generate nonempty string from any amount of repeats of the empty string') num_str_reps, num_extra_chars = divmod(target_length, len(string)) - remainder = (string[:num_extra_chars]) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty + remainder = (string[:num_extra_chars],) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty return joiner.join(num_str_reps*(string,) + remainder) # tuples here are ~2 OOM faster than moral equivalent with lists \ No newline at end of file From d3c8be7a2caa979ab6a71c4b9877e6cc0c00a477 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 16:11:47 -0700 Subject: [PATCH 129/191] Wrote unit tests for textual.substrings --- .../tests/genutils/textual/test_substrings.py | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 polymerist/tests/genutils/textual/test_substrings.py diff --git a/polymerist/tests/genutils/textual/test_substrings.py b/polymerist/tests/genutils/textual/test_substrings.py new file mode 100644 index 0000000..a7ab00a --- /dev/null +++ b/polymerist/tests/genutils/textual/test_substrings.py @@ -0,0 +1,64 @@ +'''Unit tests for `substrings` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import pytest + +from polymerist.genutils.textual.substrings import unique_string, shortest_repeating_substring, repeat_string_to_length + + +@pytest.mark.parametrize('string', ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit']) +def test_unique_str_unordered(string : str) -> None: + '''Test that unique characters are coorectly identified WITHOUT respect to order''' + assert set(unique_string(string, preserve_order=False)) == set(string) + +@pytest.mark.parametrize('string, expected_output', + [ + ('aaaaa', 'a'), + ('BABAB', 'BA'), + ('balaclava', 'balcv'), + ('catamaran', 'catmrn'), + ('unique', 'uniqe'), # self-reference makes everything better :P + ('singular', 'singular'), # test string with aready-unique characters are unaffected + ] +) +def test_unique_str_ordered(string : str, expected_output : str) -> None: + '''Test that unique characters are coorectly identified WITH respect to order''' + assert unique_string(string, preserve_order=True) == expected_output + + +@pytest.mark.parametrize('string, expected_output', + [ + ('aaaaa', 'a'), + ('booboo', 'boo'), + ('piripiri', 'piri'), + ('ababab', 'ab'), + ('bcbabcbabcba', 'bcba'), + # sequences which do not repeat a whole-number of times + ('no repeats', 'no repeats'), + ('ababa', 'ababa'), + ('bonobo', 'bonobo'), + ] +) +def test_shortest_repeating_substring(string : str, expected_output : str) -> None: + '''Test that minimal repeating substrings are correctly identified''' + assert shortest_repeating_substring(string) == expected_output + + +@pytest.mark.parametrize('string, target_length, joiner, expected_output', + [ + ('BACA', 10, '', 'BACABACABA'), # expected "standard" use case + ('BACA', 1, '', 'B'), # test case where target length is shorter than the whole string + ('BACA', 0, '', ''), # test that no repeats yields the empty string + ('BACA', 4, '', 'BACA'), # test precisely one repeat without joins + ('BACA', 10, '|', 'BACA|BACA|BA'), # test joiners + ('BACA', 4, '|', 'BACA'), # test no joiners are added when exactly one string repeat occurs + ('BACA', 12, '|', 'BACA|BACA|BACA'), # test no extraneous joiners are included for purely-whole number of repeats + ('CAT', 5, '', 'CATCA'), # test with triads (and different base string) + pytest.param('', 7, '', None, marks=pytest.mark.xfail(raises=ValueError, reason='Empty string can\'t be repeated into nonempty string', strict=True)), + ] +) +def test_repeat_string_to_length(string : str, target_length : int, joiner : str, expected_output : str) -> None: + '''Test that string repetition to a given length returns the expected string WITH joingin characters present''' + assert repeat_string_to_length(string, target_length=target_length, joiner=joiner) == expected_output \ No newline at end of file From 487ae6a910e7847ffc4dae520ccb02cc6d6da38a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 16:58:00 -0700 Subject: [PATCH 130/191] Delayed monomer linearity check to only be on the monomer fragments selected for building --- polymerist/polymers/building.py | 43 +++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 9b08067..1705385 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -20,7 +20,7 @@ from .estimation import estimate_n_atoms_linear from ..genutils.decorators.functional import allow_string_paths -from ..genutils.textual.substrings import uniquify_str +from ..genutils.textual.substrings import unique_string from ..rdutils.bonding.portlib import get_linker_ids from ..rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports @@ -63,13 +63,17 @@ def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int # LINEAR POLYMER BUILDING -def build_linear_polymer(monomers : MonomerGroup, n_monomers : int, sequence : str='A', add_Hs : bool=False, energy_minimize : bool=False) -> MBPolymer: +def build_linear_polymer( + monomers : MonomerGroup, + n_monomers : int, + sequence : str='A', + allow_partial_sequences : bool=True, + add_Hs : bool=False, + energy_minimize : bool=False, + ) -> MBPolymer: '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' - # 0) VERIFY THAT CHAIN ACTUAL CAN DEFINE LINEAR POLYMER - if not monomers.is_linear: - raise MorphologyError('Linear polymer building does not support non-linear monomer input') - + # 0) DETERMINE THE ORIENTATION AND NUMBER OF TERMINAL MONOMERS, SUPPLYING THIS IF AN INVALID DEFINITION IS PROVIDED if monomers.has_valid_linear_term_orient: # DEV: consider moving this logic into MonomerGroup term_orient = monomers.term_orient LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}') @@ -81,32 +85,32 @@ def build_linear_polymer(monomers : MonomerGroup, n_monomers : int, sequence : s LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - n_terminal = len(term_orient) # determine how many terminal monomers are actually present and well-defined + n_mono_term = len(term_orient) # determine how many terminal monomers are actually present and well-defined + n_mono_middle = n_monomers - n_mono_term # in a linear chain, all monomers are either middle of terminal block_size = len(sequence) - if ((n_monomers - n_terminal) % block_size) != 0: - raise ValueError(f'Cannot build a(n) {n_monomers}-monomer chain from any number of {block_size}-monomer blocks and {n_terminal} end groups') + if (n_mono_middle % block_size) != 0: + raise ValueError(f'Cannot build a(n) {n_monomers}-monomer chain from any number of {block_size}-monomer blocks and {n_mono_term} end groups') # NOTE: not explicitly forcing n_seq_reps to catch lingering float input / inexact division errors - n_seq_reps = (n_monomers - n_terminal) // block_size # number of times to repeat the block sequence between end groups to reach the target chain length + n_seq_reps = n_mono_middle // block_size # number of times to repeat the block sequence between end groups to reach the target chain length if n_seq_reps < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced by hand - raise InsufficientChainLengthError(f'{n_monomers}-monomer chain has few total monomers to accomodate {n_terminal} end groups AND at least 1 middle monomer sequence') + raise InsufficientChainLengthError(f'{n_monomers}-monomer chain has few total monomers to accomodate {n_mono_term} end groups AND at least 1 middle monomer sequence') # TODO: consider adding support for fractional sequence lengths IFF that fraction is a rational number whose denominator divides the sequence length... # ...for example, could allow 5/2 * 'BACA' to be interpreted as 'BACA|BACA|BA'; 5/3 * 'BACA' would still be invalid though - LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_terminal} terminal monomers = {n_monomers} total monomers)') + LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_mono_term} terminal monomers = {n_monomers} total monomers)') # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY - monomers_used = MonomerGroup() # used to track and estimate sized of the monomers being used - + monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building ## 2A) ADD MIDDLE MONOMERS TO CHAIN chain = MBPolymer() for (resname, middle_monomer), sequence_key in zip( monomers.iter_rdmols(term_only=False), - uniquify_str(sequence, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence + unique_string(sequence, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence ): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{sequence_key}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer) chain.add_monomer(compound=mb_monomer, indices=linker_ids) - monomers_used.monomers[resname] = monomers.monomers[resname] + monomers_selected.monomers[resname] = monomers.monomers[resname] ## 2B) ADD TERMINAL MONOMERS TO CHAIN term_iters = { # need to convert to iterators to allow for generator-like advancement (required for term group selection to behave as expected) @@ -118,10 +122,13 @@ def build_linear_polymer(monomers : MonomerGroup, n_monomers : int, sequence : s term_monomer = next(term_iters[resname]) mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer) chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation - monomers_used.monomers[resname] = monomers.monomers[resname] + monomers_selected.monomers[resname] = monomers.monomers[resname] # 3) ASSEMBLE AND RETURN CHAIN - n_atoms_est = estimate_n_atoms_linear(monomers_used, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy + if not monomers_selected.is_linear: # verify the selected monomers actually define a linear polymer + raise MorphologyError('Linear polymer building does not support non-linear monomer input') + + n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') chain.build(n_seq_reps, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): From ddc20bcac532f76d57db614c8e362d9041d49aa8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 17:06:22 -0700 Subject: [PATCH 131/191] Added range and int typing checks to target_length --- polymerist/genutils/textual/substrings.py | 4 ++++ polymerist/tests/genutils/textual/test_substrings.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py index 2d9041f..83c6f4f 100644 --- a/polymerist/genutils/textual/substrings.py +++ b/polymerist/genutils/textual/substrings.py @@ -69,6 +69,10 @@ def repeat_string_to_length(string : str, target_length : int, joiner : str='') ''' if not string: raise ValueError(f'Cannot generate nonempty string from any amount of repeats of the empty string') + if not isinstance(target_length, int): + raise TypeError(f'Only integer target string lengths are allowed, not non-integer type "{type(target_length).__name__}"') + if target_length < 0: + raise IndexError(f'Cannot generate a string of negative length (requested length of {target_length} character(s))') num_str_reps, num_extra_chars = divmod(target_length, len(string)) remainder = (string[:num_extra_chars],) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty diff --git a/polymerist/tests/genutils/textual/test_substrings.py b/polymerist/tests/genutils/textual/test_substrings.py index a7ab00a..1130812 100644 --- a/polymerist/tests/genutils/textual/test_substrings.py +++ b/polymerist/tests/genutils/textual/test_substrings.py @@ -56,7 +56,9 @@ def test_shortest_repeating_substring(string : str, expected_output : str) -> No ('BACA', 4, '|', 'BACA'), # test no joiners are added when exactly one string repeat occurs ('BACA', 12, '|', 'BACA|BACA|BACA'), # test no extraneous joiners are included for purely-whole number of repeats ('CAT', 5, '', 'CATCA'), # test with triads (and different base string) - pytest.param('', 7, '', None, marks=pytest.mark.xfail(raises=ValueError, reason='Empty string can\'t be repeated into nonempty string', strict=True)), + pytest.param('' , 7, '', None, marks=pytest.mark.xfail(raises=ValueError, reason='Empty string can\'t be repeated into nonempty string', strict=True)), + pytest.param('CAT', 4.2, '', None, marks=pytest.mark.xfail(raises=TypeError , reason='Non-integer string length doesn\'t make sense', strict=True)), + pytest.param('CAT', -1, '', None, marks=pytest.mark.xfail(raises=IndexError, reason='Can\'t have string with fewer than 0 characters', strict=True)), ] ) def test_repeat_string_to_length(string : str, target_length : int, joiner : str, expected_output : str) -> None: From 8714eb841a4d8081240571d2e1b46fa7632929c2 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 17:56:28 -0700 Subject: [PATCH 132/191] Added option to register residue names when converting a spec SMARTS fragment into an mBuild Compound --- polymerist/polymers/building.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 1705385..54f119c 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -6,6 +6,8 @@ import logging LOGGER = logging.getLogger(__name__) +from typing import Optional + import warnings with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings warnings.filterwarnings('ignore', category=DeprecationWarning) @@ -31,23 +33,29 @@ # CONVERSION -def mbmol_from_mono_rdmol(rdmol : Chem.Mol) -> tuple[Compound, list[int]]: - '''Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports''' +def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tuple[Compound, list[int]]: + ''' + Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports + If "resname" is provided, will assign that name to the mBuild Compound returned + ''' linker_ids = [i for i in get_linker_ids(rdmol)] # record indices of ports - MUST unpack generator for mbuild compatibility # create port-free version of molecule which RDKit can embed without errors prot_mol = hydrogenate_rdmol_ports(rdmol, in_place=False) # prot_mol = saturate_ports(rdmol) # TOSELF : custom, port-based saturation methods are not yet ready for deployment - yield issues in RDKit representation under-the-hood Chem.SanitizeMol(prot_mol, sanitizeOps=SANITIZE_AS_KEKULE) # ensure Mol is valid (avoids implicitValence issues) + mb_compound = mb.conversion.from_rdkit(prot_mol) # native from_rdkit() method actually appears to preserve atom ordering + if resname is not None: + mb_compound.name = resname return mb_compound, linker_ids @allow_string_paths -def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int=2, res_repl : dict[str, str]=None) -> None: +def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int=2, resname_repl : dict[str, str]=None) -> None: '''Save an MBuild Compound into an OpenMM-compatible PDB file''' - if res_repl is None: # avoid mutable default - res_repl = {'RES' : 'Pol'} + if resname_repl is None: # avoid mutable default + resname_repl = {'RES' : 'Pol'} traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory @@ -58,7 +66,7 @@ def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int positions=omm_pos, uniquify_atom_ids=True, num_atom_id_digits=num_atom_digits, - resname_repl=res_repl + resname_repl=resname_repl ) @@ -108,7 +116,7 @@ def build_linear_polymer( unique_string(sequence, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence ): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{sequence_key}")') - mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer) + mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) chain.add_monomer(compound=mb_monomer, indices=linker_ids) monomers_selected.monomers[resname] = monomers.monomers[resname] @@ -118,9 +126,9 @@ def build_linear_polymer( for resname, rdmol_list in monomers.rdmols(term_only=True).items() } for resname, head_or_tail in term_orient.items(): - LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') term_monomer = next(term_iters[resname]) - mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer) + LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') + mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation monomers_selected.monomers[resname] = monomers.monomers[resname] From 0fe10e91f4706ca1e57a30ee74c0ececa581745c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 17:57:43 -0700 Subject: [PATCH 133/191] Renamed "resname_repl" to "resname_map" throughout --- polymerist/mdtools/openmmtools/serialization.py | 8 ++++---- polymerist/polymers/building.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/polymerist/mdtools/openmmtools/serialization.py b/polymerist/mdtools/openmmtools/serialization.py index 6bad38a..521e8f0 100644 --- a/polymerist/mdtools/openmmtools/serialization.py +++ b/polymerist/mdtools/openmmtools/serialization.py @@ -120,11 +120,11 @@ def serialize_system(sys_path : Path, system : System) -> None: @allow_string_paths def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions : Union[NDArray, list[Vec3]], keep_chain_and_res_ids : bool=True, - uniquify_atom_ids : bool=True, num_atom_id_digits : int=2, resname_repl : Optional[dict[str, str]]=None) -> None: + uniquify_atom_ids : bool=True, num_atom_id_digits : int=2, resname_map : Optional[dict[str, str]]=None) -> None: '''Configure and write an Protein DataBank File from an OpenMM Topology and array of positions Provides options to configure atom ID numbering, residue numbering, and residue naming''' - if resname_repl is None: - resname_repl = {} # avoids mutable default + if resname_map is None: + resname_map = {} # avoids mutable default # chain config for chain in topology.chains(): @@ -133,7 +133,7 @@ def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions : # residue config for residue in topology.residues(): residue.id = str(residue.id) # avoids TypeError when specifying keepIds during PDB write - repl_res_name = resname_repl.get(residue.name, None) # lookup current residue name to see if a replacement is called for + repl_res_name = resname_map.get(residue.name, None) # lookup current residue name to see if a replacement is called for if repl_res_name is not None: residue.name = repl_res_name diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 54f119c..1b6fc27 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -52,10 +52,10 @@ def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tup return mb_compound, linker_ids @allow_string_paths -def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int=2, resname_repl : dict[str, str]=None) -> None: +def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int=2, resname_map : dict[str, str]=None) -> None: '''Save an MBuild Compound into an OpenMM-compatible PDB file''' - if resname_repl is None: # avoid mutable default - resname_repl = {'RES' : 'Pol'} + if resname_map is None: # avoid mutable default + resname_map = {'RES' : 'Pol'} traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory @@ -66,7 +66,7 @@ def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int positions=omm_pos, uniquify_atom_ids=True, num_atom_id_digits=num_atom_digits, - resname_repl=resname_repl + resname_map=resname_map ) From 0bd435543ace8f52613a77500a53872f36d6ae62 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 4 Dec 2024 19:04:49 -0700 Subject: [PATCH 134/191] Implemented mBuild Compound to RDKit converter which preserves conformer and residue info --- polymerist/polymers/building.py | 47 +++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 1b6fc27..ba5042b 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -17,6 +17,7 @@ from pathlib import Path from rdkit import Chem +from collections import Counter from .exceptions import InsufficientChainLengthError, MorphologyError from .estimation import estimate_n_atoms_linear @@ -68,6 +69,52 @@ def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int num_atom_id_digits=num_atom_digits, resname_map=resname_map ) + +# TODO: deduplify PDB atom anme and residue numbering code against serialize_openmm_pdb() +def mbmol_to_rdmol( + mbmol : Compound, + uniquify_atom_ids : bool=False, + num_atom_id_digits : int=2, + resname_map : Optional[dict[str, str]]=None + ) -> Chem.Mol: + '''Convert an mBuild Compound into an RDKit Mol, with correct atom coordinates and PDB residue info''' + if resname_map is None: + resname_map = {} + + rdmol = mbmol.to_rdkit() + conformer = Chem.Conformer() + conformer.Set3D(True) + + atom_id : int = 0 + element_counter = Counter() + for resnum, mb_monomer in enumerate(mbmol.children, start=1): + resname = resname_map.get(mb_monomer.name, mb_monomer.name[:3]) # if no remapping is found, just take first 3 chars + # NOTE: the order of monomers and atoms within those monomers were added in the same order as iterated over here... + #... so the atom indices **SHOULD** be in the correct order (hate that this even might be uncertain) + for mbatom in mb_monomer.particles(): + conformer.SetAtomPosition(atom_id, 10*mbatom.pos.astype(float)) # conveert from nm to angstrom + + # set PDB residue info if monomer hierarchy is present + if mbatom != mb_monomer: # for Compounds with a flat hierarchy, the children and particles of children will coincide + symbol = mbatom.element.symbol + atom_ser_id = element_counter[symbol] + atom_ser_str = f'{atom_ser_id:0{num_atom_id_digits}d}' if uniquify_atom_ids else ' ' # double space keeps column justification correct when non-unique + atom_name = f' {symbol}{atom_ser_str}' # need a leading space to get column alignment in PDB compliant with spec + + pdb_info = Chem.AtomPDBResidueInfo( + atomName=atom_name, + residueName=resname, + residueNumber=resnum, + chainId='1', + isHeteroAtom=True, + ) + element_counter[symbol] += 1 # only increment AFTER prior value has been assigned to the current atom + rdmol.GetAtomWithIdx(atom_id).SetPDBResidueInfo(pdb_info) + + atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this + conf_id = rdmol.AddConformer(conformer) + + return rdmol # LINEAR POLYMER BUILDING From d94733608cf7cfbe30e583b5506655555c90ac25 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 12:49:47 -0700 Subject: [PATCH 135/191] Deprecated irrelevant custom Exceptions, pared down use of "Error" suffix on Exception names --- polymerist/polymers/estimation.py | 4 ++-- polymerist/polymers/exceptions.py | 34 ++++++++----------------------- 2 files changed, 11 insertions(+), 27 deletions(-) diff --git a/polymerist/polymers/estimation.py b/polymerist/polymers/estimation.py index 37d2784..4dc3fa8 100644 --- a/polymerist/polymers/estimation.py +++ b/polymerist/polymers/estimation.py @@ -6,7 +6,7 @@ import numpy as np from rdkit import Chem -from .exceptions import InsufficientChainLengthError +from .exceptions import InsufficientChainLength from ..genutils.iteration import iter_len from ..polymers.monomers.repr import MonomerGroup from ..rdutils.bonding.portlib import get_num_ports @@ -45,7 +45,7 @@ def estimate_n_monomers_infimum(monomers : MonomerGroup, n_atoms_max : int, n_mo ''' n_atoms_base = estimate_n_atoms_linear(monomers, n_monomers_min) if n_atoms_base > n_atoms_max: # pre-check when optimization is impossible - raise InsufficientChainLengthError(f'Even shortest possible chain ({n_monomers_min} monomers, with {n_atoms_base} atoms) is longer than the specified max length of {n_atoms_max} atoms') + raise InsufficientChainLength(f'Even shortest possible chain ({n_monomers_min} monomers, with {n_atoms_base} atoms) is longer than the specified max length of {n_atoms_max} atoms') n_monomers = n_monomers_min while estimate_n_atoms_linear(monomers, n_monomers + 1) < n_atoms_max: # check if adding 1 more monomer keeps the length below the threshold diff --git a/polymerist/polymers/exceptions.py b/polymerist/polymers/exceptions.py index e7b5625..dd9e7af 100644 --- a/polymerist/polymers/exceptions.py +++ b/polymerist/polymers/exceptions.py @@ -4,50 +4,34 @@ __email__ = 'timotej.bernat@colorado.edu' -class SubstructMatchFailedError(Exception): - '''Raised when molecule graph isomorphism match does not form a cover''' - pass - -class InsufficientChainLengthError(Exception): +class InsufficientChainLength(Exception): '''Raised when the polymer molecule being built is too short''' pass -class ExcessiveChainLengthError(Exception): +class ExcessiveChainLength(Exception): '''Raised when the polymer molecule being built is too long''' pass +class PartialBlockSequence(Exception): + '''Raised when an non-whole number of copolymer blocks is needed to reach a target chain length (and is not allowed)''' + pass + class MorphologyError(Exception): '''Raised when a polymer does not have the morphology (i.e. crosslinking, molecular weight, etc) an application expects''' pass -class AlreadySolvatedError(Exception): +class AlreadySolvated(Exception): '''Raised when attempting to add solvent to a molecule which already has solvent''' pass -class ChargeMismatchError(Exception): +class ChargeMismatch(Exception): '''Raised when attempting to merge two objects which disagree on their charging status''' pass -class NoSimulationsFoundError(Exception): - '''Raised when attempting to load a simulation for a managed molecule when none are present''' - pass - class MissingStructureData(Exception): '''Raised when a managed molecule has no associated structure file (e.g. PDB, SDF, etc.)''' pass -class MissingForceFieldData(Exception): - '''Raised when a forcefield is unspecified for a Simulation or Interchange''' - pass - class MissingMonomerData(Exception): - '''Raised when no monomer information is found for a Polymer''' - pass - -class MissingMonomerDataUncharged(MissingMonomerData): - '''Raised when no monomer information WITHOUT library charges is found for a Polymer''' - pass - -class MissingMonomerDataCharged(MissingMonomerData): - '''Raised when no monomer information WITH library charges is found for a Polymer''' + '''Raised when no monomer fragment information is found for a Polymer''' pass From 7d4b5a147f8b800ea55558e604829d082c7f6ca0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 14:12:12 -0700 Subject: [PATCH 136/191] Implemented support for fractional sequence repeats, with informative Exceptions for invalid inputs --- polymerist/polymers/building.py | 82 ++++++++++++++++++++++----------- 1 file changed, 55 insertions(+), 27 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index ba5042b..094e3d6 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -15,15 +15,17 @@ from mbuild import Compound from mbuild.lib.recipes.polymer import Polymer as MBPolymer +from fractions import Fraction from pathlib import Path -from rdkit import Chem from collections import Counter -from .exceptions import InsufficientChainLengthError, MorphologyError +from rdkit import Chem + +from .exceptions import InsufficientChainLength, PartialBlockSequence, MorphologyError from .estimation import estimate_n_atoms_linear from ..genutils.decorators.functional import allow_string_paths -from ..genutils.textual.substrings import unique_string +from ..genutils.textual.substrings import unique_string, repeat_string_to_length from ..rdutils.bonding.portlib import get_linker_ids from ..rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports @@ -53,7 +55,12 @@ def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tup return mb_compound, linker_ids @allow_string_paths -def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int=2, resname_map : dict[str, str]=None) -> None: +def mbmol_to_openmm_pdb( + pdb_path : Path, + mbmol : Compound, + num_atom_digits : int=2, + resname_map : Optional[dict[str, str]]=None, + ) -> None: '''Save an MBuild Compound into an OpenMM-compatible PDB file''' if resname_map is None: # avoid mutable default resname_map = {'RES' : 'Pol'} @@ -116,53 +123,74 @@ def mbmol_to_rdmol( return rdmol - # LINEAR POLYMER BUILDING def build_linear_polymer( monomers : MonomerGroup, n_monomers : int, sequence : str='A', - allow_partial_sequences : bool=True, + allow_partial_sequences : bool=False, add_Hs : bool=False, energy_minimize : bool=False, ) -> MBPolymer: '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' - # 0) DETERMINE THE ORIENTATION AND NUMBER OF TERMINAL MONOMERS, SUPPLYING THIS IF AN INVALID DEFINITION IS PROVIDED - if monomers.has_valid_linear_term_orient: # DEV: consider moving this logic into MonomerGroup + # 0) DETERMINE THE ORIENTATION AND NUMBER OF TERMINAL MONOMERS, SUPPLYING THIS IF AN INVALID DEFINITION IS PROVIDED - DEV: consider moving this logic into MonomerGroup + if monomers.has_valid_linear_term_orient: term_orient = monomers.term_orient LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}') else: term_orient = { resname : orient - for (resname, rdmol), orient in zip(monomers.iter_rdmols(term_only=True), ['head', 'tail']) # will raise StopIteration if fewer + for (resname, rdmol), orient in zip(monomers.iter_rdmols(term_only=True), ['head', 'tail']) } LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') - # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - n_mono_term = len(term_orient) # determine how many terminal monomers are actually present and well-defined - n_mono_middle = n_monomers - n_mono_term # in a linear chain, all monomers are either middle of terminal + # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function block_size = len(sequence) + n_mono_term = len(term_orient) # number of terminal monomers are actually present and well-defined + n_mono_middle = n_monomers - n_mono_term # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal + if n_mono_middle < 0: + raise InsufficientChainLength(f'Registered number of terminal monomers exceeds requested chain length ({n_monomers}-mer chain can\'t possibly contain {n_mono_term} terminal monomers)') + + n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target + n_symbols_remaining : int # number of any remaining symbols in sequence (i.e. monomers) needed to close the gap to the target (allowed to be 0 if target is a multiple of the sequence length) + n_seq_whole, n_symbols_remaining = divmod(n_mono_middle, block_size) + print(n_seq_whole, n_symbols_remaining) + + if n_symbols_remaining != 0: # a whole number of sequence repeats (including possibly 0) plus some fraction of a full block sequence + if not allow_partial_sequences: + raise PartialBlockSequence( + f'Partial polymer block sequence required to meet target number of monomers ("{sequence[:n_symbols_remaining]}" prefix of sequence "{sequence}"). ' \ + 'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again' + ) + sequence_selected = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') + n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) + LOGGER.warning( + f'Target number of monomers is achievable WITH a partial {n_symbols_remaining}/{block_size} sequence repeat; ' \ + f'({n_seq_whole}*{block_size} [{sequence}] + {n_symbols_remaining} [{sequence[:n_symbols_remaining]}]) middle monomers + {n_mono_term} terminal monomers = {n_monomers} total monomers' + ) + else: # for a purely-whole number of block sequence repeats + if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced by hand + raise InsufficientChainLength( + f'{n_monomers}-monomer chain cannot accomodate both {n_mono_term} end groups AND at least 1 middle monomer sequence' + ) + sequence_selected = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case + n_seq_repeats = n_seq_whole + LOGGER.info( + f'Target chain length achievable with {n_seq_repeats} whole block(s) of the sequence "{sequence_selected}"; ' \ + f'({n_seq_repeats}*{block_size} [{sequence_selected}]) middle monomers + {n_mono_term} terminal monomers = {n_monomers} total monomers' + ) + print(sequence_selected, n_seq_repeats) - if (n_mono_middle % block_size) != 0: - raise ValueError(f'Cannot build a(n) {n_monomers}-monomer chain from any number of {block_size}-monomer blocks and {n_mono_term} end groups') - # NOTE: not explicitly forcing n_seq_reps to catch lingering float input / inexact division errors - n_seq_reps = n_mono_middle // block_size # number of times to repeat the block sequence between end groups to reach the target chain length - if n_seq_reps < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced by hand - raise InsufficientChainLengthError(f'{n_monomers}-monomer chain has few total monomers to accomodate {n_mono_term} end groups AND at least 1 middle monomer sequence') - # TODO: consider adding support for fractional sequence lengths IFF that fraction is a rational number whose denominator divides the sequence length... - # ...for example, could allow 5/2 * 'BACA' to be interpreted as 'BACA|BACA|BA'; 5/3 * 'BACA' would still be invalid though - LOGGER.info(f'Target chain length achievable with {n_seq_reps} block sequence repeat(s) ({n_seq_reps}*{block_size} [{sequence}] middle monomers + {n_mono_term} terminal monomers = {n_monomers} total monomers)') - # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building ## 2A) ADD MIDDLE MONOMERS TO CHAIN chain = MBPolymer() - for (resname, middle_monomer), sequence_key in zip( + for (resname, middle_monomer), symbol in zip( monomers.iter_rdmols(term_only=False), - unique_string(sequence, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence + unique_string(sequence_selected, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence ): # zip with sequence limits number of middle monomers to length of block sequence - LOGGER.info(f'Registering middle monomer {resname} (block identifier "{sequence_key}")') + LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) chain.add_monomer(compound=mb_monomer, indices=linker_ids) monomers_selected.monomers[resname] = monomers.monomers[resname] @@ -173,7 +201,7 @@ def build_linear_polymer( for resname, rdmol_list in monomers.rdmols(term_only=True).items() } for resname, head_or_tail in term_orient.items(): - term_monomer = next(term_iters[resname]) + term_monomer = next(term_iters[resname]) # will raise StopIteration if the terminal monomer in question is empty LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation @@ -185,7 +213,7 @@ def build_linear_polymer( n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') - chain.build(n_seq_reps, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) + chain.build(n_seq_repeats, sequence=sequence_selected, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') From 3797c64158d776fb9a43eb4c2ce73a7ec1f38d06 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 15:09:21 -0700 Subject: [PATCH 137/191] Added new custom Exception for end-group dominated chains --- polymerist/polymers/exceptions.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polymerist/polymers/exceptions.py b/polymerist/polymers/exceptions.py index dd9e7af..b7502cb 100644 --- a/polymerist/polymers/exceptions.py +++ b/polymerist/polymers/exceptions.py @@ -12,6 +12,9 @@ class ExcessiveChainLength(Exception): '''Raised when the polymer molecule being built is too long''' pass +class EndGroupDominatedChain(Exception): + '''Raised to indicate there are more end groups present in a chain than are monomer possibly allowed''' + class PartialBlockSequence(Exception): '''Raised when an non-whole number of copolymer blocks is needed to reach a target chain length (and is not allowed)''' pass From 8c96e48128a0399d2bc0f8dd5169afb21cef8d32 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 15:43:48 -0700 Subject: [PATCH 138/191] Separated procrustean sequence determination into dedicated helper function --- polymerist/polymers/building.py | 135 ++++++++++++++++++++++---------- 1 file changed, 95 insertions(+), 40 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 094e3d6..4ef796c 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -21,7 +21,7 @@ from rdkit import Chem -from .exceptions import InsufficientChainLength, PartialBlockSequence, MorphologyError +from .exceptions import EndGroupDominatedChain, InsufficientChainLength, PartialBlockSequence, MorphologyError from .estimation import estimate_n_atoms_linear from ..genutils.decorators.functional import allow_string_paths @@ -124,35 +124,62 @@ def mbmol_to_rdmol( return rdmol # LINEAR POLYMER BUILDING -def build_linear_polymer( - monomers : MonomerGroup, - n_monomers : int, - sequence : str='A', - allow_partial_sequences : bool=False, - add_Hs : bool=False, - energy_minimize : bool=False, - ) -> MBPolymer: - '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) - and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' - # 0) DETERMINE THE ORIENTATION AND NUMBER OF TERMINAL MONOMERS, SUPPLYING THIS IF AN INVALID DEFINITION IS PROVIDED - DEV: consider moving this logic into MonomerGroup - if monomers.has_valid_linear_term_orient: - term_orient = monomers.term_orient - LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}') - else: - term_orient = { - resname : orient - for (resname, rdmol), orient in zip(monomers.iter_rdmols(term_only=True), ['head', 'tail']) - } - LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') +def procrustean_polymer_sequence_alignment( + sequence : str, + n_monomers_target : int, + n_monomers_terminal : int, + allow_partial_sequences : bool=False + ) -> tuple[str, int]: + ''' + For a given polymer block sequence "S", target linear chain length, and number of terminal monomers, + Returns a sequence "P" and number of repeats "r" which, taken together, satisfy the following: + - The number of monomers in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers + - The symbols in sequence P cycle through the symbols in S, in the order they appear in S + - The number of times S is cycles through in P is always a rational multiple of the length of S + If no satisfiable sequence-count pair can be found, raises an appropriate informative exception + + Named to reflect the fact that the original sequence S will be stretched or truncated to fit the given target sequence length + + Parameters + ---------- + sequence : str + A sequence indicating a periodic ordering of monomers in a linear polymer block (e.g. "A", "ABAC", etc) + Each unique symbol in the sequence corresponds to a distinct monomer in the block + n_monomers_target : int + The desired number of monomers (including terminal monomers) in a polymer chain + n_monomers_terminal : int + The number of terminal monomers ("end groups") which are to be included in the chain + in addition to the middle monomers described by "sequence" + allow_partial_sequences : bool, default False + Whether to allow fractional repeats of the original sequence in order to meet the target number of monomers + + For example, to construct a 12-mer chain with 2 end groups from the sequence "BACA", one would require 10 middle monomers + which can only be achieved with 2.5 (10/4) sequence repeats, namely as "BACA|BACA|BA"; - # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function + This behavior may or may not be desired, depending on the use case, and can be controlled by this flag + + Returns + ------- + sequence_procrustean : str + A possibly modified version of the original polymer block sequence + n_seq_repeats : int + The number of times "sequence_procrustean" must be repeated to achieve the target sequence length + + Raises + ------ + End GroupDominatedChain + The number of terminal monomers exceed the number of total monomers + PartialBlockSequence + If a partial sequence repeat is required but disallowed (by setting allow_partial_sequences=False) + InsufficientChainLength + If + ''' block_size = len(sequence) - n_mono_term = len(term_orient) # number of terminal monomers are actually present and well-defined - n_mono_middle = n_monomers - n_mono_term # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal + n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal if n_mono_middle < 0: - raise InsufficientChainLength(f'Registered number of terminal monomers exceeds requested chain length ({n_monomers}-mer chain can\'t possibly contain {n_mono_term} terminal monomers)') + raise EndGroupDominatedChain(f'Registered number of terminal monomers exceeds requested chain length ({n_monomers_target}-mer chain can\'t possibly contain {n_monomers_terminal} terminal monomers)') - n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target + n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target n_symbols_remaining : int # number of any remaining symbols in sequence (i.e. monomers) needed to close the gap to the target (allowed to be 0 if target is a multiple of the sequence length) n_seq_whole, n_symbols_remaining = divmod(n_mono_middle, block_size) print(n_seq_whole, n_symbols_remaining) @@ -163,33 +190,61 @@ def build_linear_polymer( f'Partial polymer block sequence required to meet target number of monomers ("{sequence[:n_symbols_remaining]}" prefix of sequence "{sequence}"). ' \ 'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again' ) - sequence_selected = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') + sequence_procrustean = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) LOGGER.warning( f'Target number of monomers is achievable WITH a partial {n_symbols_remaining}/{block_size} sequence repeat; ' \ - f'({n_seq_whole}*{block_size} [{sequence}] + {n_symbols_remaining} [{sequence[:n_symbols_remaining]}]) middle monomers + {n_mono_term} terminal monomers = {n_monomers} total monomers' + f'({n_seq_whole}*{block_size} [{sequence}] + {n_symbols_remaining} [{sequence[:n_symbols_remaining]}]) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers} total monomers' ) else: # for a purely-whole number of block sequence repeats - if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced by hand + if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand raise InsufficientChainLength( - f'{n_monomers}-monomer chain cannot accomodate both {n_mono_term} end groups AND at least 1 middle monomer sequence' + f'{n_monomers_target}-monomer chain cannot accomodate both {n_monomers_terminal} end groups AND at least 1 middle monomer sequence' ) - sequence_selected = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case + sequence_procrustean = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case n_seq_repeats = n_seq_whole LOGGER.info( - f'Target chain length achievable with {n_seq_repeats} whole block(s) of the sequence "{sequence_selected}"; ' \ - f'({n_seq_repeats}*{block_size} [{sequence_selected}]) middle monomers + {n_mono_term} terminal monomers = {n_monomers} total monomers' + f'Target chain length achievable with {n_seq_repeats} whole block(s) of the sequence "{sequence_procrustean}"; ' \ + f'({n_seq_repeats}*{block_size} [{sequence_procrustean}]) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers_target} total monomers' ) - print(sequence_selected, n_seq_repeats) + return sequence_procrustean, n_seq_repeats + + +def build_linear_polymer( + monomers : MonomerGroup, + n_monomers : int, + sequence : str='A', + allow_partial_sequences : bool=False, + add_Hs : bool=False, + energy_minimize : bool=False, + ) -> MBPolymer: + '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) + and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' + # 0) DETERMINE THE ORIENTATION AND NUMBER OF TERMINAL MONOMERS, SUPPLYING THIS IF AN INVALID DEFINITION IS PROVIDED - DEV: consider moving this logic into MonomerGroup + if monomers.has_valid_linear_term_orient: + term_orient = monomers.term_orient + LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}') + else: + term_orient = { + orient : resname + for (resname, rdmol), orient in zip(monomers.iter_rdmols(term_only=True), ['head', 'tail']) + } + LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') + + # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function + sequence_compliant, n_seq_repeats = procrustean_polymer_sequence_alignment( + sequence, + n_monomers_target=n_monomers, + n_monomers_terminal=len(term_orient), # number of terminal monomers are actually present and well-defined + allow_partial_sequences=allow_partial_sequences, + ) + sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building ## 2A) ADD MIDDLE MONOMERS TO CHAIN chain = MBPolymer() - for (resname, middle_monomer), symbol in zip( - monomers.iter_rdmols(term_only=False), - unique_string(sequence_selected, preserve_order=True), # only register a new monomer for each appearance of a new indicator in the sequence - ): # zip with sequence limits number of middle monomers to length of block sequence + for (resname, middle_monomer), symbol in zip(monomers.iter_rdmols(term_only=False), sequence_unique): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) chain.add_monomer(compound=mb_monomer, indices=linker_ids) @@ -200,7 +255,7 @@ def build_linear_polymer( resname : iter(rdmol_list) # made necessary by annoying list-bound structure of current substructure spec for resname, rdmol_list in monomers.rdmols(term_only=True).items() } - for resname, head_or_tail in term_orient.items(): + for head_or_tail, resname in term_orient.items(): term_monomer = next(term_iters[resname]) # will raise StopIteration if the terminal monomer in question is empty LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) @@ -213,7 +268,7 @@ def build_linear_polymer( n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') - chain.build(n_seq_repeats, sequence=sequence_selected, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) + chain.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') From f7422bb15054918ecaf1c90bd6a5f5d8267e3a27 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 15:44:40 -0700 Subject: [PATCH 139/191] Switched order of residue name and head/tail identifier in MonomerGroup.term_orient (head/tail is now key, and residue name is value) --- polymerist/polymers/monomers/repr.py | 8 ++++---- polymerist/tests/data/peg-pla-pga.json | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index f8ebf62..a3c50ba 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -23,7 +23,7 @@ class MonomerGroup: '''Stores collections of residue-labelled monomer SMARTS''' monomers : ResidueSmarts = field(default_factory=dict) - term_orient : dict[str, str] = field(default_factory=dict) + term_orient : dict[str, str] = field(default_factory=dict) # keys are either "head" or "tail", values are the names of residues in "monomers" @staticmethod def is_terminal(monomer : Mol) -> bool: @@ -87,9 +87,9 @@ def _is_valid(self) -> bool: def has_valid_linear_term_orient(self) -> bool: '''Check whether terminal group orientations are sufficient to define a linear polymer''' return ( - bool(self.term_orient) # check that: 1) the term group orientations are non-empty / non-null... - and all(resname in self.monomers for resname in self.term_orient.keys()) # 2) all term group keys match a present monomer... - and sorted(self.term_orient.values()) == ['head', 'tail'] # 3) orientation labels are only "head" and "tail" (in either order) + bool(self.term_orient) # check that: 1) term group orientations are non-empty... + and set(self.term_orient.keys()) == {'head', 'tail'} # 2) ...orientation labels are only "head" and "tail" (in any order)... + and all(resname in self.monomers for resname in self.term_orient.values()) # 3) ... and all term group keys match a present monomer ) # COMPOSITION AND I/O METHODS diff --git a/polymerist/tests/data/peg-pla-pga.json b/polymerist/tests/data/peg-pla-pga.json index 2db06af..a865ed5 100644 --- a/polymerist/tests/data/peg-pla-pga.json +++ b/polymerist/tests/data/peg-pla-pga.json @@ -31,8 +31,8 @@ ] }, "term_orient": { - "PEG-1A": "head", - "PEG_1B": "tail" + "head": "PEG-1A", + "tail": "PEG_1B" } } } \ No newline at end of file From e2be34df7d04bdf4fc254dba01617e917354aab1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 16:33:40 -0700 Subject: [PATCH 140/191] Added __post_init__ check for listification of bare SMARTS and for SMARTS string validity --- polymerist/polymers/monomers/repr.py | 55 ++++++++++++++++++---------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index a3c50ba..b74944c 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -12,19 +12,36 @@ from ...genutils.iteration import iter_len from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable -from ...rdutils.bonding.portlib import get_num_ports +from ...smileslib.primitives import Smarts, is_valid_SMARTS +from ...rdutils.bonding.portlib import get_num_ports -ResidueSmarts : TypeAlias = dict[str, list[str]] # monomer SMARTS strings keyed by residue name # MAIN REPRESENTATION CLASS @make_jsonifiable @dataclass class MonomerGroup: '''Stores collections of residue-labelled monomer SMARTS''' - monomers : ResidueSmarts = field(default_factory=dict) + monomers : dict[str, Union[Smarts, list[Smarts]]] = field(default_factory=dict) term_orient : dict[str, str] = field(default_factory=dict) # keys are either "head" or "tail", values are the names of residues in "monomers" + def __post_init__(self) -> None: + # Encase bare SMARTS into lists and check that all monomer SMARTS are valid + for resname, smarts_seq in self.monomers.items(): + if isinstance(smarts_seq, list): + smarts_list = smarts_seq # no modification needed + elif isinstance(smarts_seq, str): + smarts_list = [smarts_seq] # wrap lone SMARTS string in list + self.monomers[resname] = smarts_list # update value internally (doesn't change size of dict) + else: + raise TypeError(f'Values of monomers must be either SMARTS strings or lists of SMARTS strings, not "{type(smarts_seq).__name__}"') + + # check that all SMARTS are valid + for i, smarts in enumerate(smarts_list): # we can now be sure that this is a list of SMARTS strings + if not is_valid_SMARTS(smarts): + raise ValueError(f'Provided invalid monomer SMARTS string for {resname}[{i}]: "{smarts}"') + # DEV: opted to forgo term_orient check for now, as modifying this violates the read-only data model aimed for here + @staticmethod def is_terminal(monomer : Mol) -> bool: '''Determine whether or not a monomer is terminal''' @@ -32,7 +49,7 @@ def is_terminal(monomer : Mol) -> bool: # ATTRIBUTE PROPERTIES AND ALIASES @property - def SMARTS(self) -> ResidueSmarts: + def SMARTS(self) -> dict[str, list[Smarts]]: '''Alias of legacy "monomers" attribute''' return self.monomers # alias of legacy name for convenience @@ -69,42 +86,40 @@ def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Mol]]: @property def n_monomers(self) -> int: - '''Returns number of present monomers - Multiple monomers with the same residue name are considered distinct''' + '''Returns number of present monomers; multiple monomers under the same residue name are considered distinct''' return iter_len(self.iter_rdmols(term_only=None)) - # VALIDATION AND PROPERTY CHECKS - @property - def _is_valid(self) -> bool: - '''Check that types and formatting are correct''' - for resname, SMARTS_list in self.monomers.items(): - if not (isinstance(resname, str) and isinstance(SMARTS_list, list)): - return False - else: - return True # valid only if none of the SMARTS lists fail - + # END GROUP DETERMINATION @property - def has_valid_linear_term_orient(self) -> bool: + def _has_valid_linear_term_orient(self) -> bool: '''Check whether terminal group orientations are sufficient to define a linear polymer''' return ( bool(self.term_orient) # check that: 1) term group orientations are non-empty... and set(self.term_orient.keys()) == {'head', 'tail'} # 2) ...orientation labels are only "head" and "tail" (in any order)... and all(resname in self.monomers for resname in self.term_orient.values()) # 3) ... and all term group keys match a present monomer ) + + @property + def linear_end_groups(self) -> dict[str, Mol]: + ''' + Returns head-and-tail end groups as defined by term_orient + If term orient is undefined, will + ''' + ... - # COMPOSITION AND I/O METHODS + # COMPOSITION METHODS def __add__(self, other : 'MonomerGroup') -> 'MonomerGroup': '''Content-aware method of merging multiple sets of monomer info via the addition operator''' cls = self.__class__ if not isinstance(other, cls): raise NotImplementedError(f'Can only merge {cls.__name__} with another {cls.__name__}, not object of type {type(other)}') - + # TODO: figure out how to handle combination of term group orientation gracefully (ignoring for now) return MonomerGroup(monomers={**self.monomers, **other.monomers}) __radd__ = __add__ # support reverse addition # CHEMICAL INFORMATION - def unique(self, cap_group : Union[str, Mol]=Chem.MolFromSmarts('[H]-[*]')) -> 'MonomerGroup': + def unique(self, cap_group : Union[Smarts, Mol]=Chem.MolFromSmarts('[H]-[*]')) -> 'MonomerGroup': '''Return a MonomerGroup containing only the unique monomers present, given a particular port saturating group (by default just a hydrogen)''' raise NotImplementedError # unique_mono = set() From 56b4144f39ab415e7014404d970f996896c4a4e1 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 16:37:26 -0700 Subject: [PATCH 141/191] Added module-level logger --- polymerist/polymers/monomers/repr.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index b74944c..8ffde83 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -3,6 +3,9 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' +import logging +LOGGER = logging.getLogger(__name__) + from typing import Generator, Optional, TypeAlias, Union from dataclasses import dataclass, field @@ -31,6 +34,7 @@ def __post_init__(self) -> None: if isinstance(smarts_seq, list): smarts_list = smarts_seq # no modification needed elif isinstance(smarts_seq, str): + LOGGER.warning(f'Wrapping bare monomer SMARTS in list to comply with spec (storing as ["{smarts_seq}"])') smarts_list = [smarts_seq] # wrap lone SMARTS string in list self.monomers[resname] = smarts_list # update value internally (doesn't change size of dict) else: From d852ed34123214a892fde69131b59e05a8e3d95b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 16:54:13 -0700 Subject: [PATCH 142/191] Added internal method for producing end groups for linear polymer building (with dynamic fallback) --- polymerist/polymers/monomers/repr.py | 42 ++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index 8ffde83..e36ec5d 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -6,12 +6,13 @@ import logging LOGGER = logging.getLogger(__name__) -from typing import Generator, Optional, TypeAlias, Union +from typing import Generator, Optional, Union from dataclasses import dataclass, field +from itertools import cycle from collections import defaultdict + from rdkit import Chem -from rdkit.Chem.rdchem import Mol from ...genutils.iteration import iter_len from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable @@ -32,6 +33,8 @@ def __post_init__(self) -> None: # Encase bare SMARTS into lists and check that all monomer SMARTS are valid for resname, smarts_seq in self.monomers.items(): if isinstance(smarts_seq, list): + if not smarts_seq: + raise IndexError(f'Empty monomer declaration for "{resname}"') # catch case where empty list if provided (would slip through subsequent checks otherwise) smarts_list = smarts_seq # no modification needed elif isinstance(smarts_seq, str): LOGGER.warning(f'Wrapping bare monomer SMARTS in list to comply with spec (storing as ["{smarts_seq}"])') @@ -47,7 +50,7 @@ def __post_init__(self) -> None: # DEV: opted to forgo term_orient check for now, as modifying this violates the read-only data model aimed for here @staticmethod - def is_terminal(monomer : Mol) -> bool: + def is_terminal(monomer : Chem.Mol) -> bool: '''Determine whether or not a monomer is terminal''' return get_num_ports(monomer) == 1 @@ -57,7 +60,7 @@ def SMARTS(self) -> dict[str, list[Smarts]]: '''Alias of legacy "monomers" attribute''' return self.monomers # alias of legacy name for convenience - def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, Mol], None, None]: + def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, Chem.Mol], None, None]: ''' Generate (residue name, RDKit Mol) pairs of all monomers present Simplifies iteration over internal lists of monomer Mols @@ -73,7 +76,7 @@ def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, M if (term_only is None) or (MonomerGroup.is_terminal(monomer) == term_only): yield (resname, monomer) - def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Mol]]: + def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Chem.Mol]]: ''' Returns dict of RDKit Mol lists keyed by residue name @@ -104,12 +107,33 @@ def _has_valid_linear_term_orient(self) -> bool: ) @property - def linear_end_groups(self) -> dict[str, Mol]: + def linear_end_groups(self) -> dict[str, Chem.Mol]: ''' Returns head-and-tail end groups as defined by term_orient - If term orient is undefined, will + + If term orient is undefined, will automatically take then first + <= 2 terminal groups available to be the end groups ''' - ... + if self._has_valid_linear_term_orient: + LOGGER.info(f'Using user-defined terminal group orientation {self.term_orient}') + monomer_iters = { + resname : cycle(smarts_list) + for resname, smarts_list in self.rdmols(term_only=True).items() + } # cycle handles degenerate end group case correctly + + return { + head_or_tail : next(monomer_iters[resname]) + for head_or_tail, resname in self.term_orient.items() + } + else: + term_orient_auto : dict[str, Smarts] = {} + end_groups_auto : dict[str, Chem.Mol] = {} + for head_or_tail, (resname, rdmol) in zip(['head', 'tail'], self.iter_rdmols(term_only=True)): # zip will bottom out early if fewer than 2 terminal monomers are present + term_orient_auto[head_or_tail] = resname + end_groups_auto[head_or_tail] = rdmol + LOGGER.warning(f'No valid terminal monomer orientations defined; auto-assigned orientations "{term_orient_auto}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') + + return end_groups_auto # COMPOSITION METHODS def __add__(self, other : 'MonomerGroup') -> 'MonomerGroup': @@ -123,7 +147,7 @@ def __add__(self, other : 'MonomerGroup') -> 'MonomerGroup': __radd__ = __add__ # support reverse addition # CHEMICAL INFORMATION - def unique(self, cap_group : Union[Smarts, Mol]=Chem.MolFromSmarts('[H]-[*]')) -> 'MonomerGroup': + def unique(self, cap_group : Union[Smarts, Chem.Mol]=Chem.MolFromSmarts('[H]-[*]')) -> 'MonomerGroup': '''Return a MonomerGroup containing only the unique monomers present, given a particular port saturating group (by default just a hydrogen)''' raise NotImplementedError # unique_mono = set() From 9903dd1b689f6849f6d72a9e8369202b034ca077 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 16:58:00 -0700 Subject: [PATCH 143/191] Changed MonomerGroup.linear_end_groups from property to vanilla method to emphasize that calculation being done is non-trivial --- polymerist/polymers/monomers/repr.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index e36ec5d..fa98e76 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -106,7 +106,6 @@ def _has_valid_linear_term_orient(self) -> bool: and all(resname in self.monomers for resname in self.term_orient.values()) # 3) ... and all term group keys match a present monomer ) - @property def linear_end_groups(self) -> dict[str, Chem.Mol]: ''' Returns head-and-tail end groups as defined by term_orient From 70ee78797955cad6e1ace1c8d06030e031d0b0f4 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 17:16:35 -0700 Subject: [PATCH 144/191] Deprecated _has_valid_linear_term_orient, included residue name in linear_end_groups() output --- polymerist/polymers/monomers/repr.py | 29 +++++++++++++--------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index fa98e76..174bd80 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -96,24 +96,21 @@ def n_monomers(self) -> int: '''Returns number of present monomers; multiple monomers under the same residue name are considered distinct''' return iter_len(self.iter_rdmols(term_only=None)) - # END GROUP DETERMINATION - @property - def _has_valid_linear_term_orient(self) -> bool: - '''Check whether terminal group orientations are sufficient to define a linear polymer''' - return ( - bool(self.term_orient) # check that: 1) term group orientations are non-empty... - and set(self.term_orient.keys()) == {'head', 'tail'} # 2) ...orientation labels are only "head" and "tail" (in any order)... - and all(resname in self.monomers for resname in self.term_orient.values()) # 3) ... and all term group keys match a present monomer - ) - - def linear_end_groups(self) -> dict[str, Chem.Mol]: + # END GROUP DETERMINATION + def linear_end_groups(self) -> dict[str, tuple[str, Chem.Mol]]: ''' - Returns head-and-tail end groups as defined by term_orient + Returns head-and-tail end group residue names and Mol objects as defined by term_orient If term orient is undefined, will automatically take then first <= 2 terminal groups available to be the end groups + + Returns + ------- + end_groups : dict[str, tuple[str, Chem.Mol]] + A dict whose keys are any of {'head', 'tail'} and whose + values are 2-tuples of residue names and Mols for the corresponding monomer ''' - if self._has_valid_linear_term_orient: + if self.term_orient and set(self.term_orient.keys()) == {'head', 'tail'}: LOGGER.info(f'Using user-defined terminal group orientation {self.term_orient}') monomer_iters = { resname : cycle(smarts_list) @@ -121,15 +118,15 @@ def linear_end_groups(self) -> dict[str, Chem.Mol]: } # cycle handles degenerate end group case correctly return { - head_or_tail : next(monomer_iters[resname]) + head_or_tail : (resname, next(monomer_iters[resname])) # will raise KeyError if any of the resnames are not present for head_or_tail, resname in self.term_orient.items() } else: term_orient_auto : dict[str, Smarts] = {} end_groups_auto : dict[str, Chem.Mol] = {} for head_or_tail, (resname, rdmol) in zip(['head', 'tail'], self.iter_rdmols(term_only=True)): # zip will bottom out early if fewer than 2 terminal monomers are present - term_orient_auto[head_or_tail] = resname - end_groups_auto[head_or_tail] = rdmol + term_orient_auto[head_or_tail] = resname # populate purely for logging + end_groups_auto[head_or_tail] = (resname, rdmol) LOGGER.warning(f'No valid terminal monomer orientations defined; auto-assigned orientations "{term_orient_auto}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') return end_groups_auto From df12d48774be007b781c3e5a7de9adcfc68d84e5 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 17:49:56 -0700 Subject: [PATCH 145/191] Deferred end group determination to internal implemenation in MonomerGroup --- polymerist/polymers/building.py | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 4ef796c..279c427 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -172,7 +172,7 @@ def procrustean_polymer_sequence_alignment( PartialBlockSequence If a partial sequence repeat is required but disallowed (by setting allow_partial_sequences=False) InsufficientChainLength - If + If the target number of monomers results in no middle monomers being included (i.e. neither full NOR partial sequence repeats) ''' block_size = len(sequence) n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal @@ -209,7 +209,6 @@ def procrustean_polymer_sequence_alignment( ) return sequence_procrustean, n_seq_repeats - def build_linear_polymer( monomers : MonomerGroup, n_monomers : int, @@ -220,30 +219,21 @@ def build_linear_polymer( ) -> MBPolymer: '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' - # 0) DETERMINE THE ORIENTATION AND NUMBER OF TERMINAL MONOMERS, SUPPLYING THIS IF AN INVALID DEFINITION IS PROVIDED - DEV: consider moving this logic into MonomerGroup - if monomers.has_valid_linear_term_orient: - term_orient = monomers.term_orient - LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}') - else: - term_orient = { - orient : resname - for (resname, rdmol), orient in zip(monomers.iter_rdmols(term_only=True), ['head', 'tail']) - } - LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') - # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function + end_groups = monomers.linear_end_groups() # cache end groups so they dont need to be recalculated when registering end groups sequence_compliant, n_seq_repeats = procrustean_polymer_sequence_alignment( sequence, n_monomers_target=n_monomers, - n_monomers_terminal=len(term_orient), # number of terminal monomers are actually present and well-defined + n_monomers_terminal=len(end_groups), # number of terminal monomers are actually present and well-defined allow_partial_sequences=allow_partial_sequences, ) sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY + chain = MBPolymer() monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building + ## 2A) ADD MIDDLE MONOMERS TO CHAIN - chain = MBPolymer() for (resname, middle_monomer), symbol in zip(monomers.iter_rdmols(term_only=False), sequence_unique): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) @@ -251,12 +241,7 @@ def build_linear_polymer( monomers_selected.monomers[resname] = monomers.monomers[resname] ## 2B) ADD TERMINAL MONOMERS TO CHAIN - term_iters = { # need to convert to iterators to allow for generator-like advancement (required for term group selection to behave as expected) - resname : iter(rdmol_list) # made necessary by annoying list-bound structure of current substructure spec - for resname, rdmol_list in monomers.rdmols(term_only=True).items() - } - for head_or_tail, resname in term_orient.items(): - term_monomer = next(term_iters[resname]) # will raise StopIteration if the terminal monomer in question is empty + for head_or_tail, (resname, term_monomer) in end_groups.items(): LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation @@ -268,11 +253,13 @@ def build_linear_polymer( n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') + chain.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) for atom in chain.particles(): atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') + # 4) OPTIONALLY, PERFORM FINAL UFF ENERGY MINIMIZATION if energy_minimize: LOGGER.info('Energy-minimizing chain to find more stable conformer') chain.energy_minimize() From f1e6925d52f298b1e0c1d59e10d02ba45362d274 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 20:33:24 -0700 Subject: [PATCH 146/191] Enhanced logging of sequence breakdown, unified logging between whole and partial cases --- polymerist/polymers/building.py | 39 ++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py index 279c427..6d0cc94 100644 --- a/polymerist/polymers/building.py +++ b/polymerist/polymers/building.py @@ -174,6 +174,7 @@ def procrustean_polymer_sequence_alignment( InsufficientChainLength If the target number of monomers results in no middle monomers being included (i.e. neither full NOR partial sequence repeats) ''' + # Evaluate sizes of missing components from given values block_size = len(sequence) n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal if n_mono_middle < 0: @@ -182,8 +183,8 @@ def procrustean_polymer_sequence_alignment( n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target n_symbols_remaining : int # number of any remaining symbols in sequence (i.e. monomers) needed to close the gap to the target (allowed to be 0 if target is a multiple of the sequence length) n_seq_whole, n_symbols_remaining = divmod(n_mono_middle, block_size) - print(n_seq_whole, n_symbols_remaining) - + + # Break down into cases by whether or not a whole number of sequence repeats is possible if n_symbols_remaining != 0: # a whole number of sequence repeats (including possibly 0) plus some fraction of a full block sequence if not allow_partial_sequences: raise PartialBlockSequence( @@ -192,10 +193,6 @@ def procrustean_polymer_sequence_alignment( ) sequence_procrustean = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) - LOGGER.warning( - f'Target number of monomers is achievable WITH a partial {n_symbols_remaining}/{block_size} sequence repeat; ' \ - f'({n_seq_whole}*{block_size} [{sequence}] + {n_symbols_remaining} [{sequence[:n_symbols_remaining]}]) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers} total monomers' - ) else: # for a purely-whole number of block sequence repeats if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand raise InsufficientChainLength( @@ -203,10 +200,32 @@ def procrustean_polymer_sequence_alignment( ) sequence_procrustean = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case n_seq_repeats = n_seq_whole - LOGGER.info( - f'Target chain length achievable with {n_seq_repeats} whole block(s) of the sequence "{sequence_procrustean}"; ' \ - f'({n_seq_repeats}*{block_size} [{sequence_procrustean}]) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers_target} total monomers' - ) + + # Generate descriptive log message to summarize sequence modifications + ## Determine info present for whole and partial sections + desc_seq_counts_parts = [] + desc_seq_order_middle = [] + + if n_seq_whole != 0: ## Whole sequence strings + desc_seq_counts_parts.append(f'{n_seq_whole} whole {block_size}-sequence repeats') + desc_seq_order_middle.append(f'{n_seq_whole}*[{sequence}]') + + if n_symbols_remaining != 0: ## Partial sequence strings + desc_seq_counts_parts.append(f'a partial {n_symbols_remaining}/{block_size} sequence repeat') + desc_seq_order_middle.append(f'[{sequence[:n_symbols_remaining]}]') + + ## Finalizing sequence counts descriptor parts + tally_str = f'({n_seq_whole}*{block_size} + {n_symbols_remaining}) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers_target} total monomers)' + if len(desc_seq_counts_parts) == 2: + desc_seq_counts_parts.insert(1, ' and ') # include conjunction if a mixed (i.e. both whole and fractional) solution was found + + ## Finalizing sequence order descriptor parts + desc_seq_order_parts = ['[END-GROUP]']*n_monomers_terminal # abut with correct amount of end group indicators + desc_seq_order_parts[1:-1] = desc_seq_order_middle # insert middle sections for whole and partial sequences + + ## putting everything together + LOGGER.info(f'Target chain length achievable with {"".join(desc_seq_counts_parts)};\n Namely, polymer will be sequenced as {" + ".join(desc_seq_order_parts)}, yielding {tally_str}') + return sequence_procrustean, n_seq_repeats def build_linear_polymer( From dc053afab32b03864c8a79e8e34beeec098eac3c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 21:44:41 -0700 Subject: [PATCH 147/191] Added custom Exception for missing package dependency which reduces error message boilerplate --- .../genutils/importutils/dependencies.py | 20 ++++++++++++++++++- polymerist/mdtools/openfftools/__init__.py | 13 ++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/polymerist/genutils/importutils/dependencies.py b/polymerist/genutils/importutils/dependencies.py index 4787afe..79fa48a 100644 --- a/polymerist/genutils/importutils/dependencies.py +++ b/polymerist/genutils/importutils/dependencies.py @@ -3,7 +3,7 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from typing import Callable, ParamSpec, TypeVar +from typing import Callable, Optional, ParamSpec, TypeVar Params = ParamSpec('Params') ReturnType = TypeVar('ReturnType') @@ -14,6 +14,24 @@ from functools import wraps +class MissingPrerequisitePackage(Exception): + '''Raised when a package dependency cannot be found and the user should be alerted with install instructions''' + def __init__(self, + importing_package_name : str, + use_case : str, + install_link : str, + dependency_name : str, + dependency_name_formal : Optional[str]=None + ): + if dependency_name_formal is None: + dependency_name_formal = dependency_name + + message = f''' + {use_case.capitalize()} require(s) {dependency_name_formal}, which was not found in the current environment + Please install `{dependency_name}` by following the installation instructions at {install_link}; then try importing from "{importing_package_name}" again''' + + super().__init__(message) + def module_installed(module_name : str) -> bool: ''' Check whether a module of the given name is present on the system diff --git a/polymerist/mdtools/openfftools/__init__.py b/polymerist/mdtools/openfftools/__init__.py index 8f1ab65..f0c5f59 100644 --- a/polymerist/mdtools/openfftools/__init__.py +++ b/polymerist/mdtools/openfftools/__init__.py @@ -4,13 +4,14 @@ __email__ = 'timotej.bernat@colorado.edu' # Subpackage-wide precheck to see if OpenFF is even usable in the first place -from ...genutils.importutils.dependencies import modules_installed +from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage if not modules_installed('openff', 'openff.toolkit'): - raise ModuleNotFoundError( - f''' - OpenFF packages which are required to utilitize {__name__} not found in current environment - Please follow installation instructions at https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html, then retry import - ''' + raise MissingPrerequisitePackage( + importing_package_name=__spec__.name, + use_case='OpenFF addons', + install_link='https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html', + dependency_name='openff-toolkit', + dependency_name_formal='the OpenFF software stack', ) # Import of toplevel OpenFF object registries From d4f636100e20a60af76019a43bb57a87d8e52b88 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 21:44:49 -0700 Subject: [PATCH 148/191] Deleted superfluous imports --- polymerist/polymers/estimation.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/polymerist/polymers/estimation.py b/polymerist/polymers/estimation.py index 4dc3fa8..b075888 100644 --- a/polymerist/polymers/estimation.py +++ b/polymerist/polymers/estimation.py @@ -4,10 +4,8 @@ __email__ = 'timotej.bernat@colorado.edu' import numpy as np -from rdkit import Chem from .exceptions import InsufficientChainLength -from ..genutils.iteration import iter_len from ..polymers.monomers.repr import MonomerGroup from ..rdutils.bonding.portlib import get_num_ports From 88182e7961febc618359c7f578dd67b1cdccaf13 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 21:48:23 -0700 Subject: [PATCH 149/191] Converted polymers.building into a package, split up functionality among child modules --- polymerist/polymers/building.py | 287 --------------------- polymerist/polymers/building/__init__.py | 18 ++ polymerist/polymers/building/linear.py | 80 ++++++ polymerist/polymers/building/mbconvert.py | 126 +++++++++ polymerist/polymers/building/sequencing.py | 115 +++++++++ 5 files changed, 339 insertions(+), 287 deletions(-) delete mode 100644 polymerist/polymers/building.py create mode 100644 polymerist/polymers/building/__init__.py create mode 100644 polymerist/polymers/building/linear.py create mode 100644 polymerist/polymers/building/mbconvert.py create mode 100644 polymerist/polymers/building/sequencing.py diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py deleted file mode 100644 index 6d0cc94..0000000 --- a/polymerist/polymers/building.py +++ /dev/null @@ -1,287 +0,0 @@ -'''Utilities for building new polymer structures; currently limited to linear polymers and PDB save format''' - -__author__ = 'Timotej Bernat' -__email__ = 'timotej.bernat@colorado.edu' - -import logging -LOGGER = logging.getLogger(__name__) - -from typing import Optional - -import warnings -with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings - warnings.filterwarnings('ignore', category=DeprecationWarning) - import mbuild as mb - from mbuild import Compound - from mbuild.lib.recipes.polymer import Polymer as MBPolymer - -from fractions import Fraction -from pathlib import Path -from collections import Counter - -from rdkit import Chem - -from .exceptions import EndGroupDominatedChain, InsufficientChainLength, PartialBlockSequence, MorphologyError -from .estimation import estimate_n_atoms_linear - -from ..genutils.decorators.functional import allow_string_paths -from ..genutils.textual.substrings import unique_string, repeat_string_to_length - -from ..rdutils.bonding.portlib import get_linker_ids -from ..rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports - -from ..mdtools.openmmtools.serialization import serialize_openmm_pdb -from ..polymers.monomers.repr import MonomerGroup -from ..polymers.monomers.specification import SANITIZE_AS_KEKULE - - -# CONVERSION -def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tuple[Compound, list[int]]: - ''' - Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports - If "resname" is provided, will assign that name to the mBuild Compound returned - ''' - linker_ids = [i for i in get_linker_ids(rdmol)] # record indices of ports - MUST unpack generator for mbuild compatibility - - # create port-free version of molecule which RDKit can embed without errors - prot_mol = hydrogenate_rdmol_ports(rdmol, in_place=False) - # prot_mol = saturate_ports(rdmol) # TOSELF : custom, port-based saturation methods are not yet ready for deployment - yield issues in RDKit representation under-the-hood - Chem.SanitizeMol(prot_mol, sanitizeOps=SANITIZE_AS_KEKULE) # ensure Mol is valid (avoids implicitValence issues) - - mb_compound = mb.conversion.from_rdkit(prot_mol) # native from_rdkit() method actually appears to preserve atom ordering - if resname is not None: - mb_compound.name = resname - - return mb_compound, linker_ids - -@allow_string_paths -def mbmol_to_openmm_pdb( - pdb_path : Path, - mbmol : Compound, - num_atom_digits : int=2, - resname_map : Optional[dict[str, str]]=None, - ) -> None: - '''Save an MBuild Compound into an OpenMM-compatible PDB file''' - if resname_map is None: # avoid mutable default - resname_map = {'RES' : 'Pol'} - - traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) - omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory - - serialize_openmm_pdb( - pdb_path, - topology=omm_top, - positions=omm_pos, - uniquify_atom_ids=True, - num_atom_id_digits=num_atom_digits, - resname_map=resname_map - ) - -# TODO: deduplify PDB atom anme and residue numbering code against serialize_openmm_pdb() -def mbmol_to_rdmol( - mbmol : Compound, - uniquify_atom_ids : bool=False, - num_atom_id_digits : int=2, - resname_map : Optional[dict[str, str]]=None - ) -> Chem.Mol: - '''Convert an mBuild Compound into an RDKit Mol, with correct atom coordinates and PDB residue info''' - if resname_map is None: - resname_map = {} - - rdmol = mbmol.to_rdkit() - conformer = Chem.Conformer() - conformer.Set3D(True) - - atom_id : int = 0 - element_counter = Counter() - for resnum, mb_monomer in enumerate(mbmol.children, start=1): - resname = resname_map.get(mb_monomer.name, mb_monomer.name[:3]) # if no remapping is found, just take first 3 chars - # NOTE: the order of monomers and atoms within those monomers were added in the same order as iterated over here... - #... so the atom indices **SHOULD** be in the correct order (hate that this even might be uncertain) - for mbatom in mb_monomer.particles(): - conformer.SetAtomPosition(atom_id, 10*mbatom.pos.astype(float)) # conveert from nm to angstrom - - # set PDB residue info if monomer hierarchy is present - if mbatom != mb_monomer: # for Compounds with a flat hierarchy, the children and particles of children will coincide - symbol = mbatom.element.symbol - atom_ser_id = element_counter[symbol] - atom_ser_str = f'{atom_ser_id:0{num_atom_id_digits}d}' if uniquify_atom_ids else ' ' # double space keeps column justification correct when non-unique - atom_name = f' {symbol}{atom_ser_str}' # need a leading space to get column alignment in PDB compliant with spec - - pdb_info = Chem.AtomPDBResidueInfo( - atomName=atom_name, - residueName=resname, - residueNumber=resnum, - chainId='1', - isHeteroAtom=True, - ) - element_counter[symbol] += 1 # only increment AFTER prior value has been assigned to the current atom - rdmol.GetAtomWithIdx(atom_id).SetPDBResidueInfo(pdb_info) - - atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this - conf_id = rdmol.AddConformer(conformer) - - return rdmol - -# LINEAR POLYMER BUILDING -def procrustean_polymer_sequence_alignment( - sequence : str, - n_monomers_target : int, - n_monomers_terminal : int, - allow_partial_sequences : bool=False - ) -> tuple[str, int]: - ''' - For a given polymer block sequence "S", target linear chain length, and number of terminal monomers, - Returns a sequence "P" and number of repeats "r" which, taken together, satisfy the following: - - The number of monomers in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers - - The symbols in sequence P cycle through the symbols in S, in the order they appear in S - - The number of times S is cycles through in P is always a rational multiple of the length of S - If no satisfiable sequence-count pair can be found, raises an appropriate informative exception - - Named to reflect the fact that the original sequence S will be stretched or truncated to fit the given target sequence length - - Parameters - ---------- - sequence : str - A sequence indicating a periodic ordering of monomers in a linear polymer block (e.g. "A", "ABAC", etc) - Each unique symbol in the sequence corresponds to a distinct monomer in the block - n_monomers_target : int - The desired number of monomers (including terminal monomers) in a polymer chain - n_monomers_terminal : int - The number of terminal monomers ("end groups") which are to be included in the chain - in addition to the middle monomers described by "sequence" - allow_partial_sequences : bool, default False - Whether to allow fractional repeats of the original sequence in order to meet the target number of monomers - - For example, to construct a 12-mer chain with 2 end groups from the sequence "BACA", one would require 10 middle monomers - which can only be achieved with 2.5 (10/4) sequence repeats, namely as "BACA|BACA|BA"; - - This behavior may or may not be desired, depending on the use case, and can be controlled by this flag - - Returns - ------- - sequence_procrustean : str - A possibly modified version of the original polymer block sequence - n_seq_repeats : int - The number of times "sequence_procrustean" must be repeated to achieve the target sequence length - - Raises - ------ - End GroupDominatedChain - The number of terminal monomers exceed the number of total monomers - PartialBlockSequence - If a partial sequence repeat is required but disallowed (by setting allow_partial_sequences=False) - InsufficientChainLength - If the target number of monomers results in no middle monomers being included (i.e. neither full NOR partial sequence repeats) - ''' - # Evaluate sizes of missing components from given values - block_size = len(sequence) - n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal - if n_mono_middle < 0: - raise EndGroupDominatedChain(f'Registered number of terminal monomers exceeds requested chain length ({n_monomers_target}-mer chain can\'t possibly contain {n_monomers_terminal} terminal monomers)') - - n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target - n_symbols_remaining : int # number of any remaining symbols in sequence (i.e. monomers) needed to close the gap to the target (allowed to be 0 if target is a multiple of the sequence length) - n_seq_whole, n_symbols_remaining = divmod(n_mono_middle, block_size) - - # Break down into cases by whether or not a whole number of sequence repeats is possible - if n_symbols_remaining != 0: # a whole number of sequence repeats (including possibly 0) plus some fraction of a full block sequence - if not allow_partial_sequences: - raise PartialBlockSequence( - f'Partial polymer block sequence required to meet target number of monomers ("{sequence[:n_symbols_remaining]}" prefix of sequence "{sequence}"). ' \ - 'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again' - ) - sequence_procrustean = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') - n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) - else: # for a purely-whole number of block sequence repeats - if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand - raise InsufficientChainLength( - f'{n_monomers_target}-monomer chain cannot accomodate both {n_monomers_terminal} end groups AND at least 1 middle monomer sequence' - ) - sequence_procrustean = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case - n_seq_repeats = n_seq_whole - - # Generate descriptive log message to summarize sequence modifications - ## Determine info present for whole and partial sections - desc_seq_counts_parts = [] - desc_seq_order_middle = [] - - if n_seq_whole != 0: ## Whole sequence strings - desc_seq_counts_parts.append(f'{n_seq_whole} whole {block_size}-sequence repeats') - desc_seq_order_middle.append(f'{n_seq_whole}*[{sequence}]') - - if n_symbols_remaining != 0: ## Partial sequence strings - desc_seq_counts_parts.append(f'a partial {n_symbols_remaining}/{block_size} sequence repeat') - desc_seq_order_middle.append(f'[{sequence[:n_symbols_remaining]}]') - - ## Finalizing sequence counts descriptor parts - tally_str = f'({n_seq_whole}*{block_size} + {n_symbols_remaining}) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers_target} total monomers)' - if len(desc_seq_counts_parts) == 2: - desc_seq_counts_parts.insert(1, ' and ') # include conjunction if a mixed (i.e. both whole and fractional) solution was found - - ## Finalizing sequence order descriptor parts - desc_seq_order_parts = ['[END-GROUP]']*n_monomers_terminal # abut with correct amount of end group indicators - desc_seq_order_parts[1:-1] = desc_seq_order_middle # insert middle sections for whole and partial sequences - - ## putting everything together - LOGGER.info(f'Target chain length achievable with {"".join(desc_seq_counts_parts)};\n Namely, polymer will be sequenced as {" + ".join(desc_seq_order_parts)}, yielding {tally_str}') - - return sequence_procrustean, n_seq_repeats - -def build_linear_polymer( - monomers : MonomerGroup, - n_monomers : int, - sequence : str='A', - allow_partial_sequences : bool=False, - add_Hs : bool=False, - energy_minimize : bool=False, - ) -> MBPolymer: - '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) - and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' - # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function - end_groups = monomers.linear_end_groups() # cache end groups so they dont need to be recalculated when registering end groups - sequence_compliant, n_seq_repeats = procrustean_polymer_sequence_alignment( - sequence, - n_monomers_target=n_monomers, - n_monomers_terminal=len(end_groups), # number of terminal monomers are actually present and well-defined - allow_partial_sequences=allow_partial_sequences, - ) - sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence - - # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY - chain = MBPolymer() - monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building - - ## 2A) ADD MIDDLE MONOMERS TO CHAIN - for (resname, middle_monomer), symbol in zip(monomers.iter_rdmols(term_only=False), sequence_unique): # zip with sequence limits number of middle monomers to length of block sequence - LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') - mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) - chain.add_monomer(compound=mb_monomer, indices=linker_ids) - monomers_selected.monomers[resname] = monomers.monomers[resname] - - ## 2B) ADD TERMINAL MONOMERS TO CHAIN - for head_or_tail, (resname, term_monomer) in end_groups.items(): - LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') - mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) - chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation - monomers_selected.monomers[resname] = monomers.monomers[resname] - - # 3) ASSEMBLE AND RETURN CHAIN - if not monomers_selected.is_linear: # verify the selected monomers actually define a linear polymer - raise MorphologyError('Linear polymer building does not support non-linear monomer input') - - n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy - LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') - - chain.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) - for atom in chain.particles(): - atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) - LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') - - # 4) OPTIONALLY, PERFORM FINAL UFF ENERGY MINIMIZATION - if energy_minimize: - LOGGER.info('Energy-minimizing chain to find more stable conformer') - chain.energy_minimize() - LOGGER.info('Energy minimization completed') - - return chain \ No newline at end of file diff --git a/polymerist/polymers/building/__init__.py b/polymerist/polymers/building/__init__.py new file mode 100644 index 0000000..f2b636a --- /dev/null +++ b/polymerist/polymers/building/__init__.py @@ -0,0 +1,18 @@ +''' +Tools for building polymer conformers out of monomer SMARTS fragments +Currently restricted to building linear homopolymers and periodic block copolymers +''' + +from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage + +if not modules_installed('mbuild'): + MissingPrerequisitePackage( + importing_package_name=__spec__.name, + use_case='Polymer building', + install_link='https://mbuild.mosdef.org/en/stable/getting_started/installation/installation.html', + dependency_name='mbuild', + dependency_name_formal='mBuild', + ) + +from .linear import build_linear_polymer +from .mbconvert import mbmol_to_openmm_pdb, mbmol_from_mono_rdmol, mbmol_to_rdmol \ No newline at end of file diff --git a/polymerist/polymers/building/linear.py b/polymerist/polymers/building/linear.py new file mode 100644 index 0000000..aa726da --- /dev/null +++ b/polymerist/polymers/building/linear.py @@ -0,0 +1,80 @@ +'''For generating linear polymer structure from monomer, sequence, and chain length information''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import logging +LOGGER = logging.getLogger(__name__) + +import warnings +with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings + warnings.filterwarnings('ignore', category=DeprecationWarning) + import mbuild as mb + from mbuild import Compound + from mbuild.lib.recipes.polymer import Polymer as MBPolymer + +from .mbconvert import mbmol_from_mono_rdmol +from .sequencing import procrustean_polymer_sequence_alignment +from ..exceptions import MorphologyError +from ..monomers.repr import MonomerGroup +from ..estimation import estimate_n_atoms_linear +from ...genutils.textual.substrings import unique_string + + +def build_linear_polymer( + monomers : MonomerGroup, + n_monomers : int, + sequence : str='A', + allow_partial_sequences : bool=False, + add_Hs : bool=False, + energy_minimize : bool=False, + ) -> MBPolymer: + '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON) + and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' + # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function + end_groups = monomers.linear_end_groups() # cache end groups so they dont need to be recalculated when registering end groups + sequence_compliant, n_seq_repeats = procrustean_polymer_sequence_alignment( + sequence, + n_monomers_target=n_monomers, + n_monomers_terminal=len(end_groups), # number of terminal monomers are actually present and well-defined + allow_partial_sequences=allow_partial_sequences, + ) + sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence + + # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY + chain = MBPolymer() + monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building + + ## 2A) ADD MIDDLE MONOMERS TO CHAIN + for (resname, middle_monomer), symbol in zip(monomers.iter_rdmols(term_only=False), sequence_unique): # zip with sequence limits number of middle monomers to length of block sequence + LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') + mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) + chain.add_monomer(compound=mb_monomer, indices=linker_ids) + monomers_selected.monomers[resname] = monomers.monomers[resname] + + ## 2B) ADD TERMINAL MONOMERS TO CHAIN + for head_or_tail, (resname, term_monomer) in end_groups.items(): + LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') + mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) + chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation + monomers_selected.monomers[resname] = monomers.monomers[resname] + + # 3) ASSEMBLE AND RETURN CHAIN + if not monomers_selected.is_linear: # verify the selected monomers actually define a linear polymer + raise MorphologyError('Linear polymer building does not support non-linear monomer input') + + n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy + LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') + + chain.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) + for atom in chain.particles(): + atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) + LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') + + # 4) OPTIONALLY, PERFORM FINAL UFF ENERGY MINIMIZATION + if energy_minimize: + LOGGER.info('Energy-minimizing chain to find more stable conformer') + chain.energy_minimize() + LOGGER.info('Energy minimization completed') + + return chain \ No newline at end of file diff --git a/polymerist/polymers/building/mbconvert.py b/polymerist/polymers/building/mbconvert.py new file mode 100644 index 0000000..7fc3075 --- /dev/null +++ b/polymerist/polymers/building/mbconvert.py @@ -0,0 +1,126 @@ +''' +Enhanced conversions to and from mbuild Compound objects which +preserve more molecular information than the utilities provided by +''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage + +if not modules_installed('mbuild'): + MissingPrerequisitePackage( + importing_package_name=__spec__.name, + use_case='Translation between chemical representations of polymers', + install_link='https://libraries.io/conda/openbabel', + dependency_name='openbabel', + dependency_name_formal='the OpenBabel chemical toolbox', + ) + +from typing import Optional + +from pathlib import Path +from collections import Counter + +from rdkit import Chem + +import warnings +with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings + warnings.filterwarnings('ignore', category=DeprecationWarning) + from mbuild import Compound + from mbuild.conversion import from_rdkit + +from ..monomers.specification import SANITIZE_AS_KEKULE +from ...genutils.decorators.functional import allow_string_paths +from ...rdutils.bonding.portlib import get_linker_ids +from ...rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports +from ...mdtools.openmmtools.serialization import serialize_openmm_pdb + + + +def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tuple[Compound, list[int]]: + ''' + Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports + If "resname" is provided, will assign that name to the mBuild Compound returned + ''' + linker_ids = [i for i in get_linker_ids(rdmol)] # record indices of ports - MUST unpack generator for mbuild compatibility + + # create port-free version of molecule which RDKit can embed without errors + prot_mol = hydrogenate_rdmol_ports(rdmol, in_place=False) + # prot_mol = saturate_ports(rdmol) # TOSELF : custom, port-based saturation methods are not yet ready for deployment - yield issues in RDKit representation under-the-hood + Chem.SanitizeMol(prot_mol, sanitizeOps=SANITIZE_AS_KEKULE) # ensure Mol is valid (avoids implicitValence issues) + + mb_compound = from_rdkit(prot_mol) # native from_rdkit() method actually appears to preserve atom ordering + if resname is not None: + mb_compound.name = resname + + return mb_compound, linker_ids + +@allow_string_paths +def mbmol_to_openmm_pdb( + pdb_path : Path, + mbmol : Compound, + num_atom_digits : int=2, + resname_map : Optional[dict[str, str]]=None, + ) -> None: + '''Save an MBuild Compound into an OpenMM-compatible PDB file''' + if resname_map is None: # avoid mutable default + resname_map = {'RES' : 'Pol'} + + traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) + omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory + + serialize_openmm_pdb( + pdb_path, + topology=omm_top, + positions=omm_pos, + uniquify_atom_ids=True, + num_atom_id_digits=num_atom_digits, + resname_map=resname_map + ) + +# TODO: deduplify PDB atom anme and residue numbering code against serialize_openmm_pdb() +def mbmol_to_rdmol( + mbmol : Compound, + uniquify_atom_ids : bool=False, + num_atom_id_digits : int=2, + resname_map : Optional[dict[str, str]]=None + ) -> Chem.Mol: + '''Convert an mBuild Compound into an RDKit Mol, with correct atom coordinates and PDB residue info''' + if resname_map is None: + resname_map = {} + + rdmol = mbmol.to_rdkit() + conformer = Chem.Conformer() + conformer.Set3D(True) + + atom_id : int = 0 + element_counter = Counter() + for resnum, mb_monomer in enumerate(mbmol.children, start=1): + resname = resname_map.get(mb_monomer.name, mb_monomer.name[:3]) # if no remapping is found, just take first 3 chars + # NOTE: the order of monomers and atoms within those monomers were added in the same order as iterated over here... + #... so the atom indices **SHOULD** be in the correct order (hate that this even might be uncertain) + for mbatom in mb_monomer.particles(): + conformer.SetAtomPosition(atom_id, 10*mbatom.pos.astype(float)) # conveert from nm to angstrom + + # set PDB residue info if monomer hierarchy is present + if mbatom != mb_monomer: # for Compounds with a flat hierarchy, the children and particles of children will coincide + symbol = mbatom.element.symbol + atom_ser_id = element_counter[symbol] + atom_ser_str = f'{atom_ser_id:0{num_atom_id_digits}d}' if uniquify_atom_ids else ' ' # double space keeps column justification correct when non-unique + atom_name = f' {symbol}{atom_ser_str}' # need a leading space to get column alignment in PDB compliant with spec + + pdb_info = Chem.AtomPDBResidueInfo( + atomName=atom_name, + residueName=resname, + residueNumber=resnum, + chainId='1', + isHeteroAtom=True, + ) + element_counter[symbol] += 1 # only increment AFTER prior value has been assigned to the current atom + rdmol.GetAtomWithIdx(atom_id).SetPDBResidueInfo(pdb_info) + + atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this + conf_id = rdmol.AddConformer(conformer) + + return rdmol \ No newline at end of file diff --git a/polymerist/polymers/building/sequencing.py b/polymerist/polymers/building/sequencing.py new file mode 100644 index 0000000..de94ee3 --- /dev/null +++ b/polymerist/polymers/building/sequencing.py @@ -0,0 +1,115 @@ +'''For generating and manipulating sequences of symbols which correspond to monomer ordering in blocky and random copolymers''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import logging +LOGGER = logging.getLogger(__name__) + +from ..exceptions import EndGroupDominatedChain, InsufficientChainLength, PartialBlockSequence +from ...genutils.textual.substrings import repeat_string_to_length + + +def procrustean_polymer_sequence_alignment( + sequence : str, + n_monomers_target : int, + n_monomers_terminal : int, + allow_partial_sequences : bool=False + ) -> tuple[str, int]: + ''' + For a given polymer block sequence "S", target linear chain length, and number of terminal monomers, + Returns a sequence "P" and number of repeats "r" which, taken together, satisfy the following: + - The number of monomers in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers + - The symbols in sequence P cycle through the symbols in S, in the order they appear in S + - The number of times S is cycles through in P is always a rational multiple of the length of S + If no satisfiable sequence-count pair can be found, raises an appropriate informative exception + + Named to reflect the fact that the original sequence S will be stretched or truncated to fit the given target sequence length + + Parameters + ---------- + sequence : str + A sequence indicating a periodic ordering of monomers in a linear polymer block (e.g. "A", "ABAC", etc) + Each unique symbol in the sequence corresponds to a distinct monomer in the block + n_monomers_target : int + The desired number of monomers (including terminal monomers) in a polymer chain + n_monomers_terminal : int + The number of terminal monomers ("end groups") which are to be included in the chain + in addition to the middle monomers described by "sequence" + allow_partial_sequences : bool, default False + Whether to allow fractional repeats of the original sequence in order to meet the target number of monomers + + For example, to construct a 12-mer chain with 2 end groups from the sequence "BACA", one would require 10 middle monomers + which can only be achieved with 2.5 (10/4) sequence repeats, namely as "BACA|BACA|BA"; + + This behavior may or may not be desired, depending on the use case, and can be controlled by this flag + + Returns + ------- + sequence_procrustean : str + A possibly modified version of the original polymer block sequence + n_seq_repeats : int + The number of times "sequence_procrustean" must be repeated to achieve the target sequence length + + Raises + ------ + End GroupDominatedChain + The number of terminal monomers exceed the number of total monomers + PartialBlockSequence + If a partial sequence repeat is required but disallowed (by setting allow_partial_sequences=False) + InsufficientChainLength + If the target number of monomers results in no middle monomers being included (i.e. neither full NOR partial sequence repeats) + ''' + # Evaluate sizes of missing components from given values + block_size = len(sequence) + n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal + if n_mono_middle < 0: + raise EndGroupDominatedChain(f'Registered number of terminal monomers exceeds requested chain length ({n_monomers_target}-mer chain can\'t possibly contain {n_monomers_terminal} terminal monomers)') + + n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target + n_symbols_remaining : int # number of any remaining symbols in sequence (i.e. monomers) needed to close the gap to the target (allowed to be 0 if target is a multiple of the sequence length) + n_seq_whole, n_symbols_remaining = divmod(n_mono_middle, block_size) + + # Break down into cases by whether or not a whole number of sequence repeats is possible + if n_symbols_remaining != 0: # a whole number of sequence repeats (including possibly 0) plus some fraction of a full block sequence + if not allow_partial_sequences: + raise PartialBlockSequence( + f'Partial polymer block sequence required to meet target number of monomers ("{sequence[:n_symbols_remaining]}" prefix of sequence "{sequence}"). ' \ + 'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again' + ) + sequence_procrustean = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') + n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) + else: # for a purely-whole number of block sequence repeats + if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand + raise InsufficientChainLength( + f'{n_monomers_target}-monomer chain cannot accomodate both {n_monomers_terminal} end groups AND at least 1 middle monomer sequence' + ) + sequence_procrustean = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case + n_seq_repeats = n_seq_whole + + # Generate descriptive log message to summarize sequence modifications + ## Determine info present for whole and partial sections + desc_seq_counts_parts = [] + desc_seq_order_middle = [] + + if n_seq_whole != 0: ## Whole sequence strings + desc_seq_counts_parts.append(f'{n_seq_whole} whole {block_size}-sequence repeats') + desc_seq_order_middle.append(f'{n_seq_whole}*[{sequence}]') + + if n_symbols_remaining != 0: ## Partial sequence strings + desc_seq_counts_parts.append(f'a partial {n_symbols_remaining}/{block_size} sequence repeat') + desc_seq_order_middle.append(f'[{sequence[:n_symbols_remaining]}]') + + ## Finalizing sequence counts descriptor parts + tally_str = f'({n_seq_whole}*{block_size} + {n_symbols_remaining}) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers_target} total monomers)' + if len(desc_seq_counts_parts) == 2: + desc_seq_counts_parts.insert(1, ' and ') # include conjunction if a mixed (i.e. both whole and fractional) solution was found + + ## Finalizing sequence order descriptor parts + desc_seq_order_parts = ['[END-GROUP]']*n_monomers_terminal # abut with correct amount of end group indicators + desc_seq_order_parts[1:-1] = desc_seq_order_middle # insert middle sections for whole and partial sequences + + ## putting everything together + LOGGER.info(f'Target chain length achievable with {"".join(desc_seq_counts_parts)};\n Namely, polymer will be sequenced as {" + ".join(desc_seq_order_parts)}, yielding {tally_str}') + + return sequence_procrustean, n_seq_repeats From c66a3f23bd0a2849b0c6c625cc97b20eb13ebc20 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 21:56:09 -0700 Subject: [PATCH 150/191] Fixed missing "raise" keywords and incorrect package checks --- polymerist/polymers/building/__init__.py | 2 +- polymerist/polymers/building/mbconvert.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/polymerist/polymers/building/__init__.py b/polymerist/polymers/building/__init__.py index f2b636a..8fdd0b7 100644 --- a/polymerist/polymers/building/__init__.py +++ b/polymerist/polymers/building/__init__.py @@ -6,7 +6,7 @@ from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage if not modules_installed('mbuild'): - MissingPrerequisitePackage( + raise MissingPrerequisitePackage( importing_package_name=__spec__.name, use_case='Polymer building', install_link='https://mbuild.mosdef.org/en/stable/getting_started/installation/installation.html', diff --git a/polymerist/polymers/building/mbconvert.py b/polymerist/polymers/building/mbconvert.py index 7fc3075..b7ae69f 100644 --- a/polymerist/polymers/building/mbconvert.py +++ b/polymerist/polymers/building/mbconvert.py @@ -8,8 +8,8 @@ from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage -if not modules_installed('mbuild'): - MissingPrerequisitePackage( +if not modules_installed('openbabel'): + raise MissingPrerequisitePackage( importing_package_name=__spec__.name, use_case='Translation between chemical representations of polymers', install_link='https://libraries.io/conda/openbabel', From e69682892241d33a3725f31b114c48be80abcccf Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Thu, 5 Dec 2024 21:56:35 -0700 Subject: [PATCH 151/191] Fiddled with MissingPrerequisitePackage error message format --- polymerist/genutils/importutils/dependencies.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/polymerist/genutils/importutils/dependencies.py b/polymerist/genutils/importutils/dependencies.py index 79fa48a..a69b4c8 100644 --- a/polymerist/genutils/importutils/dependencies.py +++ b/polymerist/genutils/importutils/dependencies.py @@ -28,7 +28,8 @@ def __init__(self, message = f''' {use_case.capitalize()} require(s) {dependency_name_formal}, which was not found in the current environment - Please install `{dependency_name}` by following the installation instructions at {install_link}; then try importing from "{importing_package_name}" again''' + Please install `{dependency_name}` by following the installation instructions at {install_link} + Then try importing from "{importing_package_name}" again''' super().__init__(message) From 31f0675584d9ff623c7dfd20558f75932d9c6e4d Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Fri, 6 Dec 2024 16:55:36 -0700 Subject: [PATCH 152/191] Added Exception for unexpectedly-empty copolymer sequences --- polymerist/polymers/exceptions.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/polymerist/polymers/exceptions.py b/polymerist/polymers/exceptions.py index b7502cb..e0060f9 100644 --- a/polymerist/polymers/exceptions.py +++ b/polymerist/polymers/exceptions.py @@ -3,7 +3,7 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' - +# CHAIN LENGTH AND SHAPE ERRORS class InsufficientChainLength(Exception): '''Raised when the polymer molecule being built is too short''' pass @@ -15,14 +15,20 @@ class ExcessiveChainLength(Exception): class EndGroupDominatedChain(Exception): '''Raised to indicate there are more end groups present in a chain than are monomer possibly allowed''' -class PartialBlockSequence(Exception): - '''Raised when an non-whole number of copolymer blocks is needed to reach a target chain length (and is not allowed)''' - pass - class MorphologyError(Exception): '''Raised when a polymer does not have the morphology (i.e. crosslinking, molecular weight, etc) an application expects''' pass +# COPOLYMER SEQUENCING ERRORS +class EmptyBlockSequence(Exception): + '''Raised when a trivial sequence of copolymer block (i.e. the empty string "") is provided when no expected''' + pass + +class PartialBlockSequence(Exception): + '''Raised when an non-whole number of copolymer blocks is needed to reach a target chain length (and is not allowed)''' + pass + +# POLYMERIZATION MISINFORMATION ERRORS class AlreadySolvated(Exception): '''Raised when attempting to add solvent to a molecule which already has solvent''' pass From 79296ae9fca19122bc504ccc58e90e3a24fc9ef5 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 14:16:39 -0700 Subject: [PATCH 153/191] Added precheck for empty sequence kernel --- polymerist/polymers/building/sequencing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/polymerist/polymers/building/sequencing.py b/polymerist/polymers/building/sequencing.py index de94ee3..3a2b8d5 100644 --- a/polymerist/polymers/building/sequencing.py +++ b/polymerist/polymers/building/sequencing.py @@ -6,7 +6,7 @@ import logging LOGGER = logging.getLogger(__name__) -from ..exceptions import EndGroupDominatedChain, InsufficientChainLength, PartialBlockSequence +from ..exceptions import EndGroupDominatedChain, InsufficientChainLength, EmptyBlockSequence, PartialBlockSequence from ...genutils.textual.substrings import repeat_string_to_length @@ -61,6 +61,9 @@ def procrustean_polymer_sequence_alignment( If the target number of monomers results in no middle monomers being included (i.e. neither full NOR partial sequence repeats) ''' # Evaluate sizes of missing components from given values + if not sequence: + raise EmptyBlockSequence('Must provide non-empty sequence kernel to yield a valid (co)polymer sequence') + block_size = len(sequence) n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal if n_mono_middle < 0: From cb2587a1c050519beff92b37261977b049309706 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 14:38:27 -0700 Subject: [PATCH 154/191] Expanded PROCRUSTEAN sequencing algorithm into dedicated dataclass --- polymerist/polymers/building/linear.py | 24 +- polymerist/polymers/building/sequencing.py | 245 ++++++++++++++------- 2 files changed, 179 insertions(+), 90 deletions(-) diff --git a/polymerist/polymers/building/linear.py b/polymerist/polymers/building/linear.py index aa726da..62e90a6 100644 --- a/polymerist/polymers/building/linear.py +++ b/polymerist/polymers/building/linear.py @@ -14,7 +14,7 @@ from mbuild.lib.recipes.polymer import Polymer as MBPolymer from .mbconvert import mbmol_from_mono_rdmol -from .sequencing import procrustean_polymer_sequence_alignment +from .sequencing import LinearCopolymerSequencer from ..exceptions import MorphologyError from ..monomers.repr import MonomerGroup from ..estimation import estimate_n_atoms_linear @@ -25,6 +25,7 @@ def build_linear_polymer( monomers : MonomerGroup, n_monomers : int, sequence : str='A', + minimize_sequence : bool=True, allow_partial_sequences : bool=False, add_Hs : bool=False, energy_minimize : bool=False, @@ -33,11 +34,20 @@ def build_linear_polymer( and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object''' # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function end_groups = monomers.linear_end_groups() # cache end groups so they dont need to be recalculated when registering end groups - sequence_compliant, n_seq_repeats = procrustean_polymer_sequence_alignment( - sequence, - n_monomers_target=n_monomers, - n_monomers_terminal=len(end_groups), # number of terminal monomers are actually present and well-defined - allow_partial_sequences=allow_partial_sequences, + end_group_names = [resname for (resname, _) in end_groups.values()] + + sequencer = LinearCopolymerSequencer( + sequence_kernel=sequence, + n_repeat_units=n_monomers, + n_repeat_units_terminal=len(end_groups) + ) + if minimize_sequence: + sequencer.reduce() # identify minimal subsequences + + sequence_compliant, n_seq_repeats = sequencer.procrustean_alignment(allow_partial_sequences=allow_partial_sequences) + LOGGER.info( + f'Target chain length achievable with {sequencer.describe_tally()}, ' \ + f'namely with the sequence {sequencer.describe_order(end_group_names=end_group_names)}' ) sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence @@ -46,7 +56,7 @@ def build_linear_polymer( monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building ## 2A) ADD MIDDLE MONOMERS TO CHAIN - for (resname, middle_monomer), symbol in zip(monomers.iter_rdmols(term_only=False), sequence_unique): # zip with sequence limits number of middle monomers to length of block sequence + for symbol, (resname, middle_monomer) in zip(sequence_unique, monomers.iter_rdmols(term_only=False)): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) chain.add_monomer(compound=mb_monomer, indices=linker_ids) diff --git a/polymerist/polymers/building/sequencing.py b/polymerist/polymers/building/sequencing.py index 3a2b8d5..8693118 100644 --- a/polymerist/polymers/building/sequencing.py +++ b/polymerist/polymers/building/sequencing.py @@ -6,113 +6,192 @@ import logging LOGGER = logging.getLogger(__name__) -from ..exceptions import EndGroupDominatedChain, InsufficientChainLength, EmptyBlockSequence, PartialBlockSequence -from ...genutils.textual.substrings import repeat_string_to_length +from typing import Iterable, Optional +from dataclasses import dataclass, field, asdict +from polymerist.polymers.exceptions import EndGroupDominatedChain, InsufficientChainLength, EmptyBlockSequence, PartialBlockSequence +from polymerist.genutils.textual.substrings import shortest_repeating_substring, repeat_string_to_length -def procrustean_polymer_sequence_alignment( - sequence : str, - n_monomers_target : int, - n_monomers_terminal : int, - allow_partial_sequences : bool=False - ) -> tuple[str, int]: + +@dataclass +class LinearCopolymerSequencer: ''' - For a given polymer block sequence "S", target linear chain length, and number of terminal monomers, - Returns a sequence "P" and number of repeats "r" which, taken together, satisfy the following: - - The number of monomers in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers - - The symbols in sequence P cycle through the symbols in S, in the order they appear in S - - The number of times S is cycles through in P is always a rational multiple of the length of S - If no satisfiable sequence-count pair can be found, raises an appropriate informative exception - - Named to reflect the fact that the original sequence S will be stretched or truncated to fit the given target sequence length + For encapsulating information about the sequence of repeat units in a periodic, linear copolymer + Also covers, as trivial special cases, homopolymers and alternating copolymers Parameters ---------- - sequence : str + sequence_kernel : str A sequence indicating a periodic ordering of monomers in a linear polymer block (e.g. "A", "ABAC", etc) Each unique symbol in the sequence corresponds to a distinct monomer in the block - n_monomers_target : int - The desired number of monomers (including terminal monomers) in a polymer chain + n_repeat_units : int + The desired total number of monomers (including terminal monomers) in a polymer chain n_monomers_terminal : int The number of terminal monomers ("end groups") which are to be included in the chain in addition to the middle monomers described by "sequence" - allow_partial_sequences : bool, default False - Whether to allow fractional repeats of the original sequence in order to meet the target number of monomers - For example, to construct a 12-mer chain with 2 end groups from the sequence "BACA", one would require 10 middle monomers - which can only be achieved with 2.5 (10/4) sequence repeats, namely as "BACA|BACA|BA"; - - This behavior may or may not be desired, depending on the use case, and can be controlled by this flag - - Returns - ------- - sequence_procrustean : str - A possibly modified version of the original polymer block sequence - n_seq_repeats : int - The number of times "sequence_procrustean" must be repeated to achieve the target sequence length - Raises ------ + EmpyBlockSequence + The sequence provided is empty (can't be used to define nonzero-length chain) End GroupDominatedChain The number of terminal monomers exceed the number of total monomers - PartialBlockSequence - If a partial sequence repeat is required but disallowed (by setting allow_partial_sequences=False) - InsufficientChainLength - If the target number of monomers results in no middle monomers being included (i.e. neither full NOR partial sequence repeats) ''' - # Evaluate sizes of missing components from given values - if not sequence: - raise EmptyBlockSequence('Must provide non-empty sequence kernel to yield a valid (co)polymer sequence') + sequence_kernel : str + n_repeat_units : int + n_repeat_units_terminal : int = 0 - block_size = len(sequence) - n_mono_middle = n_monomers_target - n_monomers_terminal # number of terminal monomers needed to reach target; in a linear chain, all monomers are either middle or terminal - if n_mono_middle < 0: - raise EndGroupDominatedChain(f'Registered number of terminal monomers exceeds requested chain length ({n_monomers_target}-mer chain can\'t possibly contain {n_monomers_terminal} terminal monomers)') + # Attribute checks and modifications + def __post_init__(self) -> None: + if not self.sequence_kernel: + raise EmptyBlockSequence('Must provide non-empty sequence kernel to yield a valid (co)polymer sequence') - n_seq_whole : int # number of full sequence repeats to reach a number of monomers less than or equal to the target - n_symbols_remaining : int # number of any remaining symbols in sequence (i.e. monomers) needed to close the gap to the target (allowed to be 0 if target is a multiple of the sequence length) - n_seq_whole, n_symbols_remaining = divmod(n_mono_middle, block_size) - - # Break down into cases by whether or not a whole number of sequence repeats is possible - if n_symbols_remaining != 0: # a whole number of sequence repeats (including possibly 0) plus some fraction of a full block sequence - if not allow_partial_sequences: - raise PartialBlockSequence( - f'Partial polymer block sequence required to meet target number of monomers ("{sequence[:n_symbols_remaining]}" prefix of sequence "{sequence}"). ' \ - 'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again' - ) - sequence_procrustean = repeat_string_to_length(sequence, target_length=n_mono_middle, joiner='') - n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) - else: # for a purely-whole number of block sequence repeats - if n_seq_whole < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand - raise InsufficientChainLength( - f'{n_monomers_target}-monomer chain cannot accomodate both {n_monomers_terminal} end groups AND at least 1 middle monomer sequence' + if self.n_repeat_units_middle < 0: + raise EndGroupDominatedChain( + f'Number of terminal monomers exceeds requested chain length; ({self.n_repeat_units}-mer ' \ + f'chain can\'t possibly contain {self.n_repeat_units_terminal} terminal monomers)' ) - sequence_procrustean = sequence # NOTE: rename here is for clarity, and for consistency with partial sequence case - n_seq_repeats = n_seq_whole + + def copy(self) -> 'LinearCopolymerSequencer': + '''Returns another equivalent instance of the current sequence info more efficiently than a complete deepcopy''' + return self.__class__(**asdict(self)) + + def reduce(self) -> None: + ''' + Determines if there is a shorter repeating subsequence making up the current sequence kernel + If there is, adjusts the sequence kernel to that minimal sequence; does nothing otherwise - # Generate descriptive log message to summarize sequence modifications - ## Determine info present for whole and partial sections - desc_seq_counts_parts = [] - desc_seq_order_middle = [] + Reduction is idempotent, and guarantees that the smallest possible kernel is used when sequencing + ''' + minimal_subsequence = shortest_repeating_substring(self.sequence_kernel) + kernel_period = self.block_size // len(minimal_subsequence) # account for any periodic shortening WITHIN the kernel + + if kernel_period == 1: + LOGGER.info(f'Sequence kernel "{self.sequence_kernel}" is already fully reduced; no changes made') + return + else: + LOGGER.info( + f'Sequence kernel "{self.sequence_kernel}" can be further decomposed as {kernel_period}*"{minimal_subsequence}"; ' \ + f'Setting kernel to minimal subsequence "{minimal_subsequence}"' + ) + self.sequence_kernel = minimal_subsequence - if n_seq_whole != 0: ## Whole sequence strings - desc_seq_counts_parts.append(f'{n_seq_whole} whole {block_size}-sequence repeats') - desc_seq_order_middle.append(f'{n_seq_whole}*[{sequence}]') + def reduced(self) -> 'LinearCopolymerSequencer': + '''Return a sequence-reduced version of the current sequence info''' + clone = self.copy() + clone.reduce() - if n_symbols_remaining != 0: ## Partial sequence strings - desc_seq_counts_parts.append(f'a partial {n_symbols_remaining}/{block_size} sequence repeat') - desc_seq_order_middle.append(f'[{sequence[:n_symbols_remaining]}]') + return clone - ## Finalizing sequence counts descriptor parts - tally_str = f'({n_seq_whole}*{block_size} + {n_symbols_remaining}) middle monomers + {n_monomers_terminal} terminal monomers = {n_monomers_target} total monomers)' - if len(desc_seq_counts_parts) == 2: - desc_seq_counts_parts.insert(1, ' and ') # include conjunction if a mixed (i.e. both whole and fractional) solution was found + # Properties derived from sequence kernel and target chain lengths + + @property + def n_repeat_units_middle(self) -> int: + '''Number of middle (i.e. non-terminal) repeat units''' + return self.n_repeat_units - self.n_repeat_units_terminal + + # Whole sequence periods + @property + def block_size(self) -> int: + '''Number of repeat units units in one whole iteration of the kernel block''' + return len(self.sequence_kernel) + period = block_size + + @property + def n_full_periods(self) -> int: + ''' + Largest number of complete repetitions of the sequence kernel which, when taken + together, contain no more repeats units than the specified number of middle units + ''' + return self.n_repeat_units_middle // self.block_size + + # Partial sequence residues + @property + def n_residual_repeat_units(self) -> int: + ''' + Difference between number of middle repeat units and units which + would occur in maximal full periods of the kernel + + By construction, is no greater than the block size and is + identically zero exactly when a whole number of kernel repeats + ''' + return self.n_repeat_units_middle % self.block_size + n_residual_symbols = n_res = n_residual_repeat_units + + @property + def has_residual(self) -> bool: + ''' + Whether or not the target number of middle repeat units + can be attained by a whole number of kernel repeats + ''' + return bool(self.n_residual_repeat_units) - ## Finalizing sequence order descriptor parts - desc_seq_order_parts = ['[END-GROUP]']*n_monomers_terminal # abut with correct amount of end group indicators - desc_seq_order_parts[1:-1] = desc_seq_order_middle # insert middle sections for whole and partial sequences + @property + def sequence_residual(self) -> str: + '''Partial repeat of the kernel sequence needed to attain the speficied number of middle units''' + return self.sequence_kernel[:self.n_residual_repeat_units] + residual = sequence_residual - ## putting everything together - LOGGER.info(f'Target chain length achievable with {"".join(desc_seq_counts_parts)};\n Namely, polymer will be sequenced as {" + ".join(desc_seq_order_parts)}, yielding {tally_str}') + ## PROCRUSTEAN sequence alignment + def procrustean_alignment(self, allow_partial_sequences : bool=False) -> tuple[str, int]: + ''' + PROCRUSTEAN: Periodic Repetition Of Cyclic Repeat Unit Sequences, Truncated to an Exact and Arbitrary Number + Stretches or truncates the sequence kernel to achieve a target sequence length - return sequence_procrustean, n_seq_repeats + Algorithm produces a sequence string "P" and number of repeats "r" which, taken together, satisfy the following: + - The number of units in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers + - The units in P cycle through the units in S, in the order they appear in S + - The number of times S is cycled through in P is always a rational multiple of the length of S + If no satisfiable sequence-count pair can be found, raises an appropriate informative exception + ''' + if not self.has_residual: # the case where the target length happens to consist of a whole-number of repeats of the kernel + if self.n_full_periods < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand + raise InsufficientChainLength( + f'{self.n_repeat_units}-monomer chain cannot accomodate both {self.n_repeat_units_terminal} end groups AND at least 1 middle monomer sequence' + ) + sequence_procrustean = self.sequence_kernel + n_seq_repeats = self.n_full_periods + else: + if not allow_partial_sequences: + raise PartialBlockSequence( + f'Partial polymer block sequence required to meet target number of monomers ("{self.residual}" prefix of sequence "{self.sequence_kernel}");\n' \ + 'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again' + ) + sequence_procrustean = repeat_string_to_length(self.sequence_kernel, target_length=self.n_repeat_units_middle, joiner='') + n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit) + + return sequence_procrustean, n_seq_repeats + + def describe_order(self, end_group_names : Optional[Iterable[str]]=None, default_end_group_name : str='END-GROUP') -> str: + '''Descriptive string presenting a condensed view of the order of repeat units in the final sequence''' + # Assign names for end groups + if end_group_names is None: + end_group_names = [f'[{default_end_group_name}]']*self.n_repeat_units_terminal + else: + end_group_names = [f'[{end_group_name}]' for end_group_name in end_group_names] # unpack into list and enforce correct number of names + if (num_names_provided := len(end_group_names)) != self.n_repeat_units_terminal: # DEV: consider supporting filling in missing names with default in future + raise IndexError(f'Defined sequence info with {self.n_repeat_units_terminal} end groups, but only provided names for {num_names_provided}') + + # Insert middle omnomer parts as necessary + sequence_middle = [] + if self.n_full_periods != 0: ## Whole sequence strings + sequence_middle.append(f'{self.n_full_periods}*[{self.sequence_kernel}]') + if self.has_residual: ## Partial sequence strings + sequence_middle.append(f'[{self.residual}]') + + # Abut with correct amount of end group indicators + sequence_parts = end_group_names[:] + sequence_parts[1:-1] = sequence_middle + + return ' + '.join(sequence_parts) + + def describe_tally(self) -> str: + '''Descriptive string indicating how all parts of the overall sequence contribute to the target number of repeat units''' + desc_seq_counts_parts = [] + if self.n_full_periods != 0: ## Whole sequence strings + desc_seq_counts_parts.append(f'{self.n_full_periods} whole {self.block_size}-sequence repeat(s)') + if self.has_residual: ## Partial sequence strings + desc_seq_counts_parts.append(f'a partial {self.n_residual_repeat_units}/{self.block_size} sequence repeat') + + return ' and '.join(desc_seq_counts_parts) + \ No newline at end of file From 742522e4e602095e8ae2f562fecc7804cd0ae9cf Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 14:49:26 -0700 Subject: [PATCH 155/191] Made LinearCopolymerSequencer serializable to/from JSON --- polymerist/polymers/building/sequencing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polymerist/polymers/building/sequencing.py b/polymerist/polymers/building/sequencing.py index 8693118..60b48ab 100644 --- a/polymerist/polymers/building/sequencing.py +++ b/polymerist/polymers/building/sequencing.py @@ -9,10 +9,12 @@ from typing import Iterable, Optional from dataclasses import dataclass, field, asdict -from polymerist.polymers.exceptions import EndGroupDominatedChain, InsufficientChainLength, EmptyBlockSequence, PartialBlockSequence -from polymerist.genutils.textual.substrings import shortest_repeating_substring, repeat_string_to_length +from ...genutils.textual.substrings import shortest_repeating_substring, repeat_string_to_length +from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable +from ..exceptions import EndGroupDominatedChain, InsufficientChainLength, EmptyBlockSequence, PartialBlockSequence +@make_jsonifiable @dataclass class LinearCopolymerSequencer: ''' From 013e09bdc46924af2289b8978c85f56d8d3af38a Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 14:49:43 -0700 Subject: [PATCH 156/191] Added RDKit-driven PDB writer for mbuild Compounds --- polymerist/polymers/building/__init__.py | 5 +- polymerist/polymers/building/mbconvert.py | 77 ++++++++++++++--------- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/polymerist/polymers/building/__init__.py b/polymerist/polymers/building/__init__.py index 8fdd0b7..dfe3e3d 100644 --- a/polymerist/polymers/building/__init__.py +++ b/polymerist/polymers/building/__init__.py @@ -15,4 +15,7 @@ ) from .linear import build_linear_polymer -from .mbconvert import mbmol_to_openmm_pdb, mbmol_from_mono_rdmol, mbmol_to_rdmol \ No newline at end of file +from .mbconvert import ( + mbmol_from_mono_rdmol, mbmol_to_rdmol, + mbmol_to_openmm_pdb, mbmol_to_rdkit_pdb, +) \ No newline at end of file diff --git a/polymerist/polymers/building/mbconvert.py b/polymerist/polymers/building/mbconvert.py index b7ae69f..ebef303 100644 --- a/polymerist/polymers/building/mbconvert.py +++ b/polymerist/polymers/building/mbconvert.py @@ -31,13 +31,13 @@ from mbuild.conversion import from_rdkit from ..monomers.specification import SANITIZE_AS_KEKULE -from ...genutils.decorators.functional import allow_string_paths +from ...genutils.decorators.functional import allow_string_paths, allow_pathlib_paths from ...rdutils.bonding.portlib import get_linker_ids from ...rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports from ...mdtools.openmmtools.serialization import serialize_openmm_pdb - +# Conversion from other formats to Compound def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tuple[Compound, list[int]]: ''' Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports @@ -55,32 +55,9 @@ def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tup mb_compound.name = resname return mb_compound, linker_ids - -@allow_string_paths -def mbmol_to_openmm_pdb( - pdb_path : Path, - mbmol : Compound, - num_atom_digits : int=2, - resname_map : Optional[dict[str, str]]=None, - ) -> None: - '''Save an MBuild Compound into an OpenMM-compatible PDB file''' - if resname_map is None: # avoid mutable default - resname_map = {'RES' : 'Pol'} - - traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) - omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory - - serialize_openmm_pdb( - pdb_path, - topology=omm_top, - positions=omm_pos, - uniquify_atom_ids=True, - num_atom_id_digits=num_atom_digits, - resname_map=resname_map - ) - -# TODO: deduplify PDB atom anme and residue numbering code against serialize_openmm_pdb() -def mbmol_to_rdmol( + +# Conversion from Compound to other formats +def mbmol_to_rdmol( # TODO: deduplify PDB atom name and residue numbering code against serialize_openmm_pdb() mbmol : Compound, uniquify_atom_ids : bool=False, num_atom_id_digits : int=2, @@ -123,4 +100,46 @@ def mbmol_to_rdmol( atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this conf_id = rdmol.AddConformer(conformer) - return rdmol \ No newline at end of file + return rdmol + +# Serialization of Compounds to files +@allow_string_paths +def mbmol_to_openmm_pdb( + pdb_path : Path, + mbmol : Compound, + num_atom_digits : int=2, + resname_map : Optional[dict[str, str]]=None, + ) -> None: + '''Save an MBuild Compound into an OpenMM-formatted PDB file''' + if resname_map is None: # avoid mutable default + resname_map = {'RES' : 'Pol'} + + traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) + omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory + + serialize_openmm_pdb( + pdb_path, + topology=omm_top, + positions=omm_pos, + uniquify_atom_ids=True, + num_atom_id_digits=num_atom_digits, + resname_map=resname_map + ) + +@allow_pathlib_paths +def mbmol_to_rdkit_pdb( + pdb_path : str, + mbmol : Compound, + num_atom_digits : int=2, + resname_map : Optional[dict[str, str]]=None, + ) -> None: + '''Save an MBuild Compound into an RDKit-formatted PDB file''' + Chem.MolToPDBFile( + mbmol_to_rdmol( + mbmol, + uniquify_atom_ids=True, + num_atom_id_digits=num_atom_digits, + resname_map=resname_map + ), + pdb_path, + ) \ No newline at end of file From fb3d898f2933eeacf641ef5de6a97d413dce2ae4 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 14:56:54 -0700 Subject: [PATCH 157/191] Expanded out unit test modules for .polymers.building --- polymerist/tests/polymers/building/__init__.py | 4 ++++ .../{test_building.py => building/test_linear.py} | 6 ++---- polymerist/tests/polymers/building/test_sequencing.py | 8 ++++++++ 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 polymerist/tests/polymers/building/__init__.py rename polymerist/tests/polymers/{test_building.py => building/test_linear.py} (65%) create mode 100644 polymerist/tests/polymers/building/test_sequencing.py diff --git a/polymerist/tests/polymers/building/__init__.py b/polymerist/tests/polymers/building/__init__.py new file mode 100644 index 0000000..f4f43b4 --- /dev/null +++ b/polymerist/tests/polymers/building/__init__.py @@ -0,0 +1,4 @@ +'''Unit tests for `building` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' \ No newline at end of file diff --git a/polymerist/tests/polymers/test_building.py b/polymerist/tests/polymers/building/test_linear.py similarity index 65% rename from polymerist/tests/polymers/test_building.py rename to polymerist/tests/polymers/building/test_linear.py index 799123f..f9f4721 100644 --- a/polymerist/tests/polymers/test_building.py +++ b/polymerist/tests/polymers/building/test_linear.py @@ -1,4 +1,4 @@ -'''Unit tests for `attrs` package''' +'''Tests construction of structures for linear copolymers (and relevant subfamilies, e.g. homopolymers)''' __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' @@ -13,6 +13,4 @@ @pytest.fixture def fragments_path() -> Path: - return get_file_path_within_package('peg=pla-pga.json', testdata) - -# Also add separate tests module for polymers.estimation + return get_file_path_within_package('peg-pla-pga.json', testdata) \ No newline at end of file diff --git a/polymerist/tests/polymers/building/test_sequencing.py b/polymerist/tests/polymers/building/test_sequencing.py new file mode 100644 index 0000000..2080dc7 --- /dev/null +++ b/polymerist/tests/polymers/building/test_sequencing.py @@ -0,0 +1,8 @@ +'''Testing that copolymer sequencing scales (and fails) as expected''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +import pytest +from pathlib import Path + From 677fedeb996f397219b069faf623dfb40c1c9e6e Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 15:42:09 -0700 Subject: [PATCH 158/191] Updated description of the "PROCRUSTEAN" acronym --- polymerist/polymers/building/sequencing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/polymers/building/sequencing.py b/polymerist/polymers/building/sequencing.py index 60b48ab..cd52c69 100644 --- a/polymerist/polymers/building/sequencing.py +++ b/polymerist/polymers/building/sequencing.py @@ -137,8 +137,8 @@ def sequence_residual(self) -> str: ## PROCRUSTEAN sequence alignment def procrustean_alignment(self, allow_partial_sequences : bool=False) -> tuple[str, int]: ''' - PROCRUSTEAN: Periodic Repetition Of Cyclic Repeat Unit Sequences, Truncated to an Exact and Arbitrary Number - Stretches or truncates the sequence kernel to achieve a target sequence length + PROCRUSTEAN: Periodic Recurrence Of Cyclic Repeat Unit Sequences, Truncated to an Exact and Arbitrary Number + Stretches or truncates the sequence kernel to achieve a target sequence length, cycling through the kernel's period as many times as needed Algorithm produces a sequence string "P" and number of repeats "r" which, taken together, satisfy the following: - The number of units in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers From 870a1eb560f892960e9020a38ded24edc759aaff Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 15:42:19 -0700 Subject: [PATCH 159/191] Wrote unit tests fo copolymer sequencing --- .../polymers/building/test_sequencing.py | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) diff --git a/polymerist/tests/polymers/building/test_sequencing.py b/polymerist/tests/polymers/building/test_sequencing.py index 2080dc7..272fd4d 100644 --- a/polymerist/tests/polymers/building/test_sequencing.py +++ b/polymerist/tests/polymers/building/test_sequencing.py @@ -3,6 +3,121 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' +from typing import Any +from dataclasses import asdict + import pytest from pathlib import Path +from polymerist.polymers.building.sequencing import LinearCopolymerSequencer as LCS +from polymerist.polymers.exceptions import EmptyBlockSequence, PartialBlockSequence, InsufficientChainLength, EndGroupDominatedChain + + +@pytest.fixture +def sequencer() -> LCS: + '''A sample sequencer with known, valid inputs''' + return LCS(sequence_kernel='ABAB', n_repeat_units=14, n_repeat_units_terminal=2) + +@pytest.mark.parametrize( + 'inputs', + [ + { + 'sequence_kernel' : 'AB', + 'n_repeat_units' : 10, + 'n_repeat_units_terminal' : 1 + }, + pytest.param( + { + 'sequence_kernel' : 'BAC', + 'n_repeat_units' : 1, + 'n_repeat_units_terminal' : 2 + }, + marks=pytest.mark.xfail( + raises=EndGroupDominatedChain, + reason='Results in (unsatisfiable) negative number of middle monomers', + strict=True, + ) + ), + pytest.param( + { + 'sequence_kernel' : '', + 'n_repeat_units' : 7, + 'n_repeat_units_terminal' : 1 + }, + marks=pytest.mark.xfail( + raises=EmptyBlockSequence, + reason='No sequence kernel provided', + strict=True, + ) + ), + ] +) +def test_LCS_input_validation(inputs : dict[str, Any]) -> None: + '''Test that invalid Sequence input are correctly rejected''' + _ = LCS(**inputs) # no assert needed, just checking when initialization completes + +def test_LCS_copying(sequencer : LCS) -> None: + '''Test that sequencers are properly copied in a read-only manner''' + sequencer_clone = sequencer.copy() + + # tamper with the parameters of the copy in a way that guarantees distinctness + sequencer_clone.sequence_kernel = 2*sequencer.sequence_kernel + sequencer_clone.n_repeat_units += 2 + sequencer_clone.n_repeat_units_terminal += 1 + + # check that the original WASN'T tampered with + assert asdict(sequencer) != asdict(sequencer_clone) + + +@pytest.mark.parametrize( + 'sequencer, expected_kernel', + [ + (LCS('ABC', n_repeat_units=12), 'ABC') , # test irrreducible case + (LCS('ABAB', n_repeat_units=12), 'AB'), # test unreduced case + ] +) +def test_LCS_reduction(sequencer : LCS, expected_kernel : str) -> None: + '''Test that shortest repeating subsequences of sequencer kernels are correctly identified''' + sequencer.reduce() + assert sequencer.sequence_kernel == expected_kernel + +@pytest.mark.parametrize( + 'sequencer, allow_partials, expected_sequence, expected_length', + [ + # tests for homopolymers + (LCS('A', 5, 1), True , 'A', 4), + (LCS('A', 5, 1), False, 'A', 4), # partial block single-monomer sequence will never exist, so "allow_partial_sequences" setting shouldn't matter) + pytest.param( + LCS('A', 1, 1), True, 'A', 1, # test that all-end group (i.e. no middle monomer) case is correctly rejected + marks=pytest.mark.xfail( + raises=InsufficientChainLength, + reason='No middle monomers can be accomodated', + strict=True, + ), + ), + # tests for "true" copolymers + (LCS('ABC', 10, 2), True, 'ABCABCAB', 1), + pytest.param( + LCS('ABC', 10, 2), False, 'ABCABCAB', 1, # test that partial-sequence ban correctly blocks partial sequences... + marks=pytest.mark.xfail( + raises=PartialBlockSequence, + reason='Partial sequence repeats have not been allowed', + strict=True, + ), + ), + (LCS('ABC', 11, 2), False, 'ABC', 3), # ...unless the resulting sequence happens to be a whole multiple + pytest.param( + LCS('ABC', 2, 2), True, '', 1, # test that all-end group (i.e. no middle monomer) case is correctly rejected... + marks=pytest.mark.xfail( + raises=InsufficientChainLength, + reason='No middle monomers can be accomodated', + strict=True, + ), + ), + (LCS('ABC', 4, 2), True, 'AB', 1), # ... and finally, check that nonempty sequences SMALLER than the kernel are also recognized if partials are permitted + ] +) +def test_LCS_procrustean_alignment(sequencer : LCS, allow_partials : bool, expected_sequence : str, expected_length : int) -> None: + '''Test capability (and prechecks) for fitting sequence to target chain length''' + seq, n_reps = sequencer.procrustean_alignment(allow_partial_sequences=allow_partials) + assert (seq == expected_sequence) and (n_reps == expected_length) \ No newline at end of file From ce9c55c98c69e506f51a5afcbe93073bd11532eb Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 15:56:57 -0700 Subject: [PATCH 160/191] Updates SMILES/SMARTS-related type annotations on validation functions --- polymerist/smileslib/primitives.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polymerist/smileslib/primitives.py b/polymerist/smileslib/primitives.py index 844cad9..91c87a6 100644 --- a/polymerist/smileslib/primitives.py +++ b/polymerist/smileslib/primitives.py @@ -13,11 +13,11 @@ Smiles : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers Smarts : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers -def is_valid_SMARTS(smarts : str) -> bool: +def is_valid_SMARTS(smarts : Smarts) -> bool: '''Check if SMARTS string is valid (according to RDKit)''' return (Chem.MolFromSmarts(smarts) is not None) -def is_valid_SMILES(smiles : str) -> bool: +def is_valid_SMILES(smiles : Smiles) -> bool: '''Check if SMARTS string is valid (according to RDKit)''' return (Chem.MolFromSmiles(smiles) is not None) From 75fcceebb0322734d51b20c4d411378a93ff6343 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 16:05:16 -0700 Subject: [PATCH 161/191] Moved fragment data directly into code, as opposed to maintaining seprate data file --- polymerist/tests/data/peg-pla-pga.json | 38 -------------------------- polymerist/tests/polymers/__init__.py | 12 ++++++++ 2 files changed, 12 insertions(+), 38 deletions(-) delete mode 100644 polymerist/tests/data/peg-pla-pga.json diff --git a/polymerist/tests/data/peg-pla-pga.json b/polymerist/tests/data/peg-pla-pga.json deleted file mode 100644 index a865ed5..0000000 --- a/polymerist/tests/data/peg-pla-pga.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "__class__": "MonomerGroup", - "__values__": { - "monomers": { - "PEG-1A": [ - "[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]" - ], - "PEG-1B": [ - "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]" - ], - "PEG-2": [ - "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]" - ], - "PLA-1A": [ - "[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]" - ], - "PLA-1B": [ - "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]" - ], - "PLA-2": [ - "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]" - ], - "PGA-1A": [ - "[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]" - ], - "PGA-1B": [ - "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]" - ], - "PGA-2": [ - "[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]" - ] - }, - "term_orient": { - "head": "PEG-1A", - "tail": "PEG_1B" - } - } -} \ No newline at end of file diff --git a/polymerist/tests/polymers/__init__.py b/polymerist/tests/polymers/__init__.py index f37a37e..3adfea5 100644 --- a/polymerist/tests/polymers/__init__.py +++ b/polymerist/tests/polymers/__init__.py @@ -2,3 +2,15 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' + +PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers for testing + 'PEG-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]'], + 'PEG-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], + 'PEG-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], + 'PLA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]'], + 'PLA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]'], + 'PLA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]'], + 'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'], + 'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'], + 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'] +} From 59f988aad938f262d162942192e39cd4b5a3cc0b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 16:05:34 -0700 Subject: [PATCH 162/191] Removed superfluous mBuild imports --- polymerist/polymers/building/linear.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/polymerist/polymers/building/linear.py b/polymerist/polymers/building/linear.py index 62e90a6..b1a5fd4 100644 --- a/polymerist/polymers/building/linear.py +++ b/polymerist/polymers/building/linear.py @@ -9,8 +9,6 @@ import warnings with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings warnings.filterwarnings('ignore', category=DeprecationWarning) - import mbuild as mb - from mbuild import Compound from mbuild.lib.recipes.polymer import Polymer as MBPolymer from .mbconvert import mbmol_from_mono_rdmol From 29d3198bfe71ed7edf706d547d01b288841cd6d9 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 16:11:57 -0700 Subject: [PATCH 163/191] Added devnote to revisit SMARTS-specification auto-cleaning --- polymerist/polymers/monomers/repr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index 174bd80..6003228 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -46,7 +46,8 @@ def __post_init__(self) -> None: # check that all SMARTS are valid for i, smarts in enumerate(smarts_list): # we can now be sure that this is a list of SMARTS strings if not is_valid_SMARTS(smarts): - raise ValueError(f'Provided invalid monomer SMARTS string for {resname}[{i}]: "{smarts}"') + raise ValueError(f'Provided invalid monomer SMARTS string for {resname}[{i}]: "{smarts}"') + # DEV: decide whether or not SMILES expansion and spec-compliance should be enforced here or shunted off to the user # DEV: opted to forgo term_orient check for now, as modifying this violates the read-only data model aimed for here @staticmethod From 7afceed37608e8e8a8c74b37f02e15acd2357975 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 16:23:46 -0700 Subject: [PATCH 164/191] Added devnote for spec compliance checker --- polymerist/polymers/monomers/specification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/polymerist/polymers/monomers/specification.py b/polymerist/polymers/monomers/specification.py index 0731105..8561dd4 100644 --- a/polymerist/polymers/monomers/specification.py +++ b/polymerist/polymers/monomers/specification.py @@ -98,6 +98,7 @@ def compliant_atom_query_from_re_match(match : re.Match) -> str: # CONVERSION METHODS +## DEV: add function to check whether a given SMARTS is COMPLETELY spec-compliant def compliant_mol_SMARTS(smarts : str) -> str: '''Convert generic SMARTS string into a spec-compliant one''' # initial checks From d6d5880a6037c0f1fdde7f58019280af62037060 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 16:52:45 -0700 Subject: [PATCH 165/191] Added MPD-TMC polyamide fragments for examples --- polymerist/tests/polymers/__init__.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/polymerist/tests/polymers/__init__.py b/polymerist/tests/polymers/__init__.py index 3adfea5..a5126d8 100644 --- a/polymerist/tests/polymers/__init__.py +++ b/polymerist/tests/polymers/__init__.py @@ -3,14 +3,27 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers for testing +PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers + # PEG (ethylene glycol) 'PEG-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]'], 'PEG-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], 'PEG-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], + # PLA (lactic acid) 'PLA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]'], 'PLA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]'], 'PLA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]'], + # PGA (glycolic acid) 'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'], 'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'], - 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'] + 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'], +} + +MPD_TMC_FRAGMENTS : dict[str, list[str]] = { # fragments for common polyamide membrane + # MPD (m-phenyl diamine) + 'MPD-1': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:11])-[#6D3+0:5](-[#1D1+0:12])=[#6D3+0:6](-[#1D1+0:13])-[#6D3+0:7](-[#1D1+0:14])=[#6D3+0:8]-1-[#7D3+0:9](-[#1D1+0:15])-[#1D1+0:16])-[#1D1+0:10]'], + 'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'], + # TMC (trimesoyl chloride) + 'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], + 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], + 'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], } From 0e7749faba1916f5c82b92726ae24db26c51ce42 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 17:24:39 -0700 Subject: [PATCH 166/191] Added unit tests for MonomerGroup initialization and core properties --- .../tests/polymers/monomers/test_repr.py | 147 ++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 polymerist/tests/polymers/monomers/test_repr.py diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py new file mode 100644 index 0000000..3c90e43 --- /dev/null +++ b/polymerist/tests/polymers/monomers/test_repr.py @@ -0,0 +1,147 @@ +'''Tests that collections of monomer fragments are treated as expected''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +from typing import Any + +import pytest + +from ..import PEG_PLGA_FRAGMENTS, MPD_TMC_FRAGMENTS +from polymerist.polymers.monomers.repr import MonomerGroup + + +@pytest.fixture(scope='function') # want to re-initialize for each test function to avoid cross-contamination +def monogrp_peg_plga() -> MonomerGroup: + return MonomerGroup(monomers=PEG_PLGA_FRAGMENTS) + +@pytest.fixture(scope='function') # want to re-initialize for each test function to avoid cross-contamination +def monogrp_mpd_tmc() -> MonomerGroup: + return MonomerGroup(monomers=MPD_TMC_FRAGMENTS) + +# Testing all routes to initialization +@pytest.mark.parametrize( + 'monomers', + [ + { # nominal test case + 'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'], + 'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'], + 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'], + }, + { # test that list closure autofill works + 'PGA-1A': '[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]', + 'PGA-1B': '[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]', + 'PGA-2': '[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]', + }, + # XFAILS: test that the initializer rejects... + pytest.param( + { # ...1) non-string like objects + 'foo' : 42.0, + 'bar' : True, + }, + marks=pytest.mark.xfail( + raises=TypeError, + reason='Monomer fragment inputs are not stringlike', + strict=True, + ), + ), + pytest.param( + { # 1a) more subtly, list OF CONTAINERS of valid SMARTS are still invalid + 'PGA-1A': [( + '[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]', + '[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]' + )], + 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'], + }, + marks=pytest.mark.xfail( + raises=TypeError, + reason='Monomer fragment inputs are not stringlike', + strict=True, + ), + ), + pytest.param( + { # ...2) empty lists + 'PGA-1A': [], + 'PGA-2' : [], + }, + marks=pytest.mark.xfail( + raises=IndexError, + reason='At least one monomer fragment input is empty', + strict=True, + ), + ), + pytest.param( + { # ...3) non-empty strings which are nevertheless invalid SMARTS + #- NOTE: empty strings, perhaps surprisingly, actually ARE valid as SMARTS and therefore aren't xfail tested here + 'fake-1': ['this is a bogus SMARTS'], + 'invalid-2': ['so_is_this'], + }, + marks=pytest.mark.xfail( + raises=ValueError, + reason='At least one monomer fragment input is not valid a SMARTS string', + strict=True, + ), + ), + pytest.param( + { # ...3a) this one is very subtle, but SMARTS with slight errors which invalidate themas SMARTS should also fail + 'PGA-1A': ['[OH]CD(=O)*'], # fat-finger mistake, "D" should be "C" + 'PGA-2': ['*OCC(+O)*'], # forgot to hit shift when typing double bond + }, + marks=pytest.mark.xfail( + raises=ValueError, + reason='At least one monomer fragment input is not valid a SMARTS string', + strict=True, + ), + ), + ] +) +def test_monogrp_init(monomers : dict[str, Any]) -> None: + '''Check that the MonomerGroup initializer handles valid inputs as expected and reject invalid inputs''' + _ = MonomerGroup(monomers=monomers) # no assert needed, just checking when initialization completes + +# Testing properties of contained monomers +@pytest.mark.parametrize( + 'monogrp, expected_is_linear', + [ + ('monogrp_peg_plga', True), + ('monogrp_mpd_tmc', False), + ], +) +def test_monogrp_linearity(monogrp : MonomerGroup, expected_is_linear : bool, request : pytest.FixtureRequest) -> None: + '''Test whether branched and unbranched chain fragment detection behaves as expected''' + monogrp = request.getfixturevalue(monogrp) # unpack fixtures into their respective values + assert monogrp.is_linear == expected_is_linear + +@pytest.mark.parametrize( + 'monogrp, expected_counts', + [ + ('monogrp_peg_plga', (3, 6)), + ('monogrp_mpd_tmc', (3, 2)), + ], +) +def test_monogrp_mid_and_term_counts(monogrp : MonomerGroup, expected_counts : tuple[int, int], request : pytest.FixtureRequest) -> None: + '''Test whether middle and terminal monomers are counted correctly''' + monogrp = request.getfixturevalue(monogrp) # unpack fixtures into their respective values + assert monogrp.num_mid_and_term == expected_counts + +# Testing end group determination +@pytest.mark.parametrize( + 'monogrp, term_orient, expected_end_groups', + [ + ('monogrp_peg_plga', {}, {'head' : 'PEG-1A', 'tail' : 'PEG-1B'}), # test autogeneration from first 2 when + ('monogrp_mpd_tmc', (3, 2)), + ], +) +def test_monogrp_end_groups(monogrp : MonomerGroup, term_orient : dict[str, str], expected_end_groups : dict[str, str], request : pytest.FixtureRequest) -> None: + '''Test whether procedural end group determination''' + monogrp = request.getfixturevalue(monogrp) # unpack fixtures into their respective values + monogrp.term_orient = term_orient + + end_group_catalogue = monogrp.linear_end_groups() + end_group_names = { + head_or_tail : resname # drop RDKit Mol for check (Mol object is harder to validate, use bound name as proxy) + for head_or_tail, (resname, _) in end_group_catalogue.items() + } + + assert end_group_names == expected_end_groups + \ No newline at end of file From e33ca3d0d8b914eb6aa65cded2734cc9e8bad8c5 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 17:28:52 -0700 Subject: [PATCH 167/191] Added polyethylene example to test when fewer than the max 2 end group templates are present --- polymerist/tests/polymers/__init__.py | 27 ++++++++++++------- .../tests/polymers/monomers/test_repr.py | 18 ++++++++++--- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/polymerist/tests/polymers/__init__.py b/polymerist/tests/polymers/__init__.py index a5126d8..d0ddb85 100644 --- a/polymerist/tests/polymers/__init__.py +++ b/polymerist/tests/polymers/__init__.py @@ -3,6 +3,23 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' + +PE_FRAGMENTS : dict[str, list[str]] = { + # PE (polyethylene) + 'PE1': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:6])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:4])-[#1D1+0:5]'], + 'PE2': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:5])-[#1D1+0:6]'], +} + +MPD_TMC_FRAGMENTS : dict[str, list[str]] = { # fragments for common polyamide membrane + # MPD (m-phenyl diamine) + 'MPD-1': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:11])-[#6D3+0:5](-[#1D1+0:12])=[#6D3+0:6](-[#1D1+0:13])-[#6D3+0:7](-[#1D1+0:14])=[#6D3+0:8]-1-[#7D3+0:9](-[#1D1+0:15])-[#1D1+0:16])-[#1D1+0:10]'], + 'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'], + # TMC (trimesoyl chloride) + 'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], + 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], + 'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], +} + PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers # PEG (ethylene glycol) 'PEG-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]'], @@ -17,13 +34,3 @@ 'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'], 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'], } - -MPD_TMC_FRAGMENTS : dict[str, list[str]] = { # fragments for common polyamide membrane - # MPD (m-phenyl diamine) - 'MPD-1': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:11])-[#6D3+0:5](-[#1D1+0:12])=[#6D3+0:6](-[#1D1+0:13])-[#6D3+0:7](-[#1D1+0:14])=[#6D3+0:8]-1-[#7D3+0:9](-[#1D1+0:15])-[#1D1+0:16])-[#1D1+0:10]'], - 'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'], - # TMC (trimesoyl chloride) - 'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], - 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], - 'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], -} diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py index 3c90e43..4b5a9ae 100644 --- a/polymerist/tests/polymers/monomers/test_repr.py +++ b/polymerist/tests/polymers/monomers/test_repr.py @@ -7,18 +7,28 @@ import pytest -from ..import PEG_PLGA_FRAGMENTS, MPD_TMC_FRAGMENTS +from ..import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS from polymerist.polymers.monomers.repr import MonomerGroup +# Example fragments groups @pytest.fixture(scope='function') # want to re-initialize for each test function to avoid cross-contamination -def monogrp_peg_plga() -> MonomerGroup: - return MonomerGroup(monomers=PEG_PLGA_FRAGMENTS) +def monogrp_degenerate() -> MonomerGroup: + return MonomerGroup(monomers={}) -@pytest.fixture(scope='function') # want to re-initialize for each test function to avoid cross-contamination +@pytest.fixture(scope='function') +def monogrp_polyethylene() -> MonomerGroup: + return MonomerGroup(monomers=PE_FRAGMENTS) + +@pytest.fixture(scope='function') def monogrp_mpd_tmc() -> MonomerGroup: return MonomerGroup(monomers=MPD_TMC_FRAGMENTS) +@pytest.fixture(scope='function') +def monogrp_peg_plga() -> MonomerGroup: + return MonomerGroup(monomers=PEG_PLGA_FRAGMENTS) + + # Testing all routes to initialization @pytest.mark.parametrize( 'monomers', From 0074e4a8f2ced05bfd8e9127d34125166c59fff2 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 17:43:03 -0700 Subject: [PATCH 168/191] Wrote unit test for end group identification --- .../tests/polymers/monomers/test_repr.py | 45 ++++++++++++++++++- 1 file changed, 43 insertions(+), 2 deletions(-) diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py index 4b5a9ae..2433f76 100644 --- a/polymerist/tests/polymers/monomers/test_repr.py +++ b/polymerist/tests/polymers/monomers/test_repr.py @@ -138,8 +138,49 @@ def test_monogrp_mid_and_term_counts(monogrp : MonomerGroup, expected_counts : t @pytest.mark.parametrize( 'monogrp, term_orient, expected_end_groups', [ - ('monogrp_peg_plga', {}, {'head' : 'PEG-1A', 'tail' : 'PEG-1B'}), # test autogeneration from first 2 when - ('monogrp_mpd_tmc', (3, 2)), + # 1) test autogeneration of orientations when... + ( # ...term orientation is unspecified but can be completed for both ends (i.e. at least 2 terminal monomers are available) + 'monogrp_peg_plga', + {}, + {'head' : 'PEG-1A', 'tail' : 'PEG-1B'}, + ), + ( # ...term orientation is unspecified and can only be partially completed (i.e. fewer than 2 terminal monomers are available) + 'monogrp_polyethylene', + {}, + {'head' : 'PE1'}, + ), + ( # ...term orientation is unspecified but can be completed for both ends (i.e. at least 2 terminal monomers are available) + 'monogrp_peg_plga', + {}, + {'head' : 'PEG-1A', 'tail' : 'PEG-1B'}, + ), + # 2) test end group identification for correctly-specified term orientation + ( # test nominal case + 'monogrp_peg_plga', + {'head' : 'PGA-1A', 'tail' : 'PEG-1B'}, + {'head' : 'PGA-1A', 'tail' : 'PEG-1B'}, + ), + ( # test that duplication works as expected + 'monogrp_polyethylene', + {'head' : 'PE1', 'tail' : 'PE1'}, + {'head' : 'PE1', 'tail' : 'PE1'}, + ), + # 3) test incorrect specifications + ( # specification without "head"/"tail" keys will not fail, but WILL default to auto-gen + 'monogrp_peg_plga', + {'first' : 'PGA-1A', 'second' : 'PEG-1B'}, + {'head' : 'PEG-1A', 'tail' : 'PEG-1B'}, + ), + pytest.param( # specification with invalid monomer names (i.e. keys not in the "monomers" dict) should raise outright error + 'monogrp_peg_plga', + {'head' : 'PGG-2C', 'tail' : 'BOGUS'}, + None, + marks=pytest.mark.xfail( + raises=KeyError, + reason='Term group names specified don;t existing within the monomer fragments defined', + strict=True, + ) + ), ], ) def test_monogrp_end_groups(monogrp : MonomerGroup, term_orient : dict[str, str], expected_end_groups : dict[str, str], request : pytest.FixtureRequest) -> None: From f27263fe7e657bba00c13647bf2f1b78955069b6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 19:56:03 -0700 Subject: [PATCH 169/191] Expanded syntax and support for addition/validation of new monomer SMARTS into MonomerGroup --- polymerist/polymers/monomers/repr.py | 71 ++++++++++++++----- .../tests/polymers/monomers/test_repr.py | 13 +--- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index 6003228..3cdc225 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -6,7 +6,7 @@ import logging LOGGER = logging.getLogger(__name__) -from typing import Generator, Optional, Union +from typing import Generator, Optional, Iterable, Union from dataclasses import dataclass, field from itertools import cycle @@ -31,36 +31,62 @@ class MonomerGroup: def __post_init__(self) -> None: # Encase bare SMARTS into lists and check that all monomer SMARTS are valid - for resname, smarts_seq in self.monomers.items(): - if isinstance(smarts_seq, list): - if not smarts_seq: - raise IndexError(f'Empty monomer declaration for "{resname}"') # catch case where empty list if provided (would slip through subsequent checks otherwise) - smarts_list = smarts_seq # no modification needed - elif isinstance(smarts_seq, str): - LOGGER.warning(f'Wrapping bare monomer SMARTS in list to comply with spec (storing as ["{smarts_seq}"])') - smarts_list = [smarts_seq] # wrap lone SMARTS string in list - self.monomers[resname] = smarts_list # update value internally (doesn't change size of dict) - else: - raise TypeError(f'Values of monomers must be either SMARTS strings or lists of SMARTS strings, not "{type(smarts_seq).__name__}"') - - # check that all SMARTS are valid - for i, smarts in enumerate(smarts_list): # we can now be sure that this is a list of SMARTS strings - if not is_valid_SMARTS(smarts): - raise ValueError(f'Provided invalid monomer SMARTS string for {resname}[{i}]: "{smarts}"') - # DEV: decide whether or not SMILES expansion and spec-compliance should be enforced here or shunted off to the user + monomers_init = self.monomers # store inputted values + self.monomers = {} # clear monomers and re-add one-at-a-time + for resname, smarts in monomers_init.items(): + self.add_monomer(resname, smarts) # DEV: opted to forgo term_orient check for now, as modifying this violates the read-only data model aimed for here + # ATTRIBUTE PROPERTIES AND ALIASES @staticmethod def is_terminal(monomer : Chem.Mol) -> bool: '''Determine whether or not a monomer is terminal''' return get_num_ports(monomer) == 1 + + def _add_monomer(self, resname : str, smarts : Smarts) -> None: + '''Add a new monomer to the templates already stored within, subject to validation checks''' + if not isinstance(smarts, str): + raise TypeError(f'Values of monomers must be either SMARTS strings or lists of SMARTS strings, not "{type(smarts).__name__}"') + # DEV: include check for empty string? (technically still a valid SMARTS string, but a pretty pathological one at that) + if not is_valid_SMARTS(smarts): + raise ValueError(f'Provided invalid monomer SMARTS string for {resname}: "{smarts}"') + # DEV: decide whether or not SMILES expansion and spec-compliance should be enforced here or shunted off to the user + + if resname in self.monomers: + existing_resgroup = self.monomers[resname] + if isinstance(existing_resgroup, list) and (smarts not in existing_resgroup): + LOGGER.info(f'Extending existing residue category "{resname}" with SMARTS {smarts}') + self.monomers[resname].append(smarts) + else: + LOGGER.info(f'Creating new residue category "{resname}", containing singular SMARTS ["{smarts}"])') + self.monomers[resname] = [smarts] + + def _add_monomers(self, resname : str, smarts_container : Iterable[Smarts]) -> None: + '''Add new monomers to the templates already stored within, subject to validation checks, from an iterable container''' + for smarts in smarts_container: + self._add_monomer(resname, smarts) + + def add_monomer(self, resname : str, smarts : Union[Smarts, Iterable[Smarts]]) -> None: + '''Register new monomers, either directly from SMARTS or from a container of SMARTS''' + if isinstance(smarts, Iterable) and not isinstance(smarts, str): # don;t want to insert one character at a time if a string is in fact provided + self._add_monomers(resname, smarts) + else: + self._add_monomer(resname, smarts) # assume any other inputs are singular values or strings + + def __getitem__(self, resname : str) -> str: + '''Convenience method to access .monomers directly from instance''' + return self.monomers[resname] # NOTE: deliberately avoid "get()" here to propagate KeyError - # ATTRIBUTE PROPERTIES AND ALIASES + def __setitem__(self, resname : str, smarts : Smarts) -> str: + '''Convenience method to access .monomers directly from instance''' + self.add_monomer(resname, smarts) + @property def SMARTS(self) -> dict[str, list[Smarts]]: '''Alias of legacy "monomers" attribute''' return self.monomers # alias of legacy name for convenience + # ITERATION OVER STORED MOLECULE FRAGMENTS def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, Chem.Mol], None, None]: ''' Generate (residue name, RDKit Mol) pairs of all monomers present @@ -92,6 +118,13 @@ def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Chem.Mol]]: return rdmol_dict + def contributions(self, term_only : Optional[bool]=None) -> dict[str, list[int]]: + '''Returns dict of the number of real (i.e. non-linker) atoms in each residue list''' + return { + resname : [mol.GetNumAtoms() - get_num_ports(mol) for mol in mol_list] + for resname, mol_list in self.rdmols(term_only=term_only).items() + } + @property def n_monomers(self) -> int: '''Returns number of present monomers; multiple monomers under the same residue name are considered distinct''' diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py index 2433f76..8c6d462 100644 --- a/polymerist/tests/polymers/monomers/test_repr.py +++ b/polymerist/tests/polymers/monomers/test_repr.py @@ -69,17 +69,6 @@ def monogrp_peg_plga() -> MonomerGroup: strict=True, ), ), - pytest.param( - { # ...2) empty lists - 'PGA-1A': [], - 'PGA-2' : [], - }, - marks=pytest.mark.xfail( - raises=IndexError, - reason='At least one monomer fragment input is empty', - strict=True, - ), - ), pytest.param( { # ...3) non-empty strings which are nevertheless invalid SMARTS #- NOTE: empty strings, perhaps surprisingly, actually ARE valid as SMARTS and therefore aren't xfail tested here @@ -93,7 +82,7 @@ def monogrp_peg_plga() -> MonomerGroup: ), ), pytest.param( - { # ...3a) this one is very subtle, but SMARTS with slight errors which invalidate themas SMARTS should also fail + { # ...3a) this one is very subtle, but SMARTS with slight errors which invalidate them as SMARTS should also fail 'PGA-1A': ['[OH]CD(=O)*'], # fat-finger mistake, "D" should be "C" 'PGA-2': ['*OCC(+O)*'], # forgot to hit shift when typing double bond }, From 10b8b8c3da65a73e61be919be7279218cb66d8bf Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 20:01:36 -0700 Subject: [PATCH 170/191] Added bug note for validation skipping when accessing monomer attributes directly --- polymerist/polymers/monomers/repr.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index 3cdc225..505ff70 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -76,6 +76,8 @@ def add_monomer(self, resname : str, smarts : Union[Smarts, Iterable[Smarts]]) - def __getitem__(self, resname : str) -> str: '''Convenience method to access .monomers directly from instance''' return self.monomers[resname] # NOTE: deliberately avoid "get()" here to propagate KeyError + # BUG: user can directly append to the returned value to forgo monomer validation checks; + # this is not unit to __getitem__ but rather a consequence of thinly-wrapping builtin types def __setitem__(self, resname : str, smarts : Smarts) -> str: '''Convenience method to access .monomers directly from instance''' From 30642dccb6c4edaee4b5ff034124d261d6497b13 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 20:02:57 -0700 Subject: [PATCH 171/191] Added test for degenerate eng group autoassignment (i.e. when NO terminal monomers are present) --- polymerist/tests/polymers/monomers/test_repr.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py index 8c6d462..ee58435 100644 --- a/polymerist/tests/polymers/monomers/test_repr.py +++ b/polymerist/tests/polymers/monomers/test_repr.py @@ -138,6 +138,11 @@ def test_monogrp_mid_and_term_counts(monogrp : MonomerGroup, expected_counts : t {}, {'head' : 'PE1'}, ), + ( # ...term orientation is unspecified and no end monomers are available for auto-assignment + 'monogrp_degenerate', + {}, + {}, + ), ( # ...term orientation is unspecified but can be completed for both ends (i.e. at least 2 terminal monomers are available) 'monogrp_peg_plga', {}, From bda08d844339e1870cf3b2c96f2512cb7b8cc342 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 21:17:48 -0700 Subject: [PATCH 172/191] Attempted (unsuccessfully) to get __hash__ working for MonomerGroup --- polymerist/polymers/monomers/repr.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index 505ff70..73f581b 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -29,6 +29,7 @@ class MonomerGroup: monomers : dict[str, Union[Smarts, list[Smarts]]] = field(default_factory=dict) term_orient : dict[str, str] = field(default_factory=dict) # keys are either "head" or "tail", values are the names of residues in "monomers" + # MONOMER ADDITION AND VALIDATION def __post_init__(self) -> None: # Encase bare SMARTS into lists and check that all monomer SMARTS are valid monomers_init = self.monomers # store inputted values @@ -37,12 +38,6 @@ def __post_init__(self) -> None: self.add_monomer(resname, smarts) # DEV: opted to forgo term_orient check for now, as modifying this violates the read-only data model aimed for here - # ATTRIBUTE PROPERTIES AND ALIASES - @staticmethod - def is_terminal(monomer : Chem.Mol) -> bool: - '''Determine whether or not a monomer is terminal''' - return get_num_ports(monomer) == 1 - def _add_monomer(self, resname : str, smarts : Smarts) -> None: '''Add a new monomer to the templates already stored within, subject to validation checks''' if not isinstance(smarts, str): @@ -73,6 +68,7 @@ def add_monomer(self, resname : str, smarts : Union[Smarts, Iterable[Smarts]]) - else: self._add_monomer(resname, smarts) # assume any other inputs are singular values or strings + # DUNDER "MAGIC" METHODS def __getitem__(self, resname : str) -> str: '''Convenience method to access .monomers directly from instance''' return self.monomers[resname] # NOTE: deliberately avoid "get()" here to propagate KeyError @@ -82,6 +78,17 @@ def __getitem__(self, resname : str) -> str: def __setitem__(self, resname : str, smarts : Smarts) -> str: '''Convenience method to access .monomers directly from instance''' self.add_monomer(resname, smarts) + + def __hash__(self) -> int: + '''Hash based on monomer SMARTS and terminal orientation in a canonical order''' + # TOSELF: this is far from bulletproof, viz. canonicalzation of SMARTS, list value sorting, etc + return hash(f'{sorted(self.monomers.items())}{sorted(self.term_orient.items())}') + + # ATTRIBUTE PROPERTIES AND ALIASES + @staticmethod + def is_terminal(monomer : Chem.Mol) -> bool: + '''Determine whether or not a monomer is terminal''' + return get_num_ports(monomer) == 1 @property def SMARTS(self) -> dict[str, list[Smarts]]: From 32f8d534f583f1c0d9d90aec1eba8ac2251b6cef Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Mon, 9 Dec 2024 21:18:43 -0700 Subject: [PATCH 173/191] Wrote unit tests for linear polymer builder --- .../tests/polymers/building/test_linear.py | 132 +++++++++++++++++- 1 file changed, 126 insertions(+), 6 deletions(-) diff --git a/polymerist/tests/polymers/building/test_linear.py b/polymerist/tests/polymers/building/test_linear.py index f9f4721..1c06206 100644 --- a/polymerist/tests/polymers/building/test_linear.py +++ b/polymerist/tests/polymers/building/test_linear.py @@ -6,11 +6,131 @@ import pytest from pathlib import Path -from polymerist.genutils.importutils.pkginspect import get_file_path_within_package -from polymerist.tests import data as testdata +from .. import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS -from polymerist.polymers import building +from collections import Counter -@pytest.fixture -def fragments_path() -> Path: - return get_file_path_within_package('peg-pla-pga.json', testdata) \ No newline at end of file +from polymerist.polymers.building import build_linear_polymer +from polymerist.polymers.monomers.repr import MonomerGroup +from polymerist.polymers.exceptions import MorphologyError, PartialBlockSequence, EmptyBlockSequence + + +@pytest.fixture(scope='function') +def monogrp_polyethylene() -> MonomerGroup: + return MonomerGroup(monomers=PE_FRAGMENTS) + +@pytest.fixture(scope='function') +def monogrp_mpd_tmc() -> MonomerGroup: + return MonomerGroup(monomers=MPD_TMC_FRAGMENTS) + +@pytest.fixture(scope='function') +def monogrp_peg_plga() -> MonomerGroup: + return MonomerGroup(monomers=PEG_PLGA_FRAGMENTS) + + +@pytest.mark.parametrize( + 'monomers, term_orient, n_monomers, sequence, minimize_sequence, allow_partial_sequences, energy_minimize', + [ + # Polyethylene + ('monogrp_polyethylene', {}, 7, 'A', True, True, False), # test end group autogen (should only have 1 term group) + ('monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'A', True, True, False), # test explicit head-tail (result here should be different from autogen structure) + ('monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'A', True, False, False), # test partial sequences (irrelevant here) + ('monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'A', True, False, True), # test energy minimization doesn't crash + pytest.param( # will fail due to too few monomers for given sequence - + 'monogrp_polyethylene', {}, 7, '', True, True, False, # NOTE: need to have partials enabled, since failure happens ONLY once sequence is passed to mbuild + marks=pytest.mark.xfail( + raises=EmptyBlockSequence, + reason='Sequence provided must be nonempty', + strict=True, + ) + ), + pytest.param( # will fail due to too few monomers for given sequence - + 'monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'AB', True, True, False, # NOTE: need to have partials enabled, since failure happens ONLY once sequence is passed to mbuild + marks=pytest.mark.xfail( + raises=ValueError, + reason='Fewer unique monomers defined than called for by target sequence', + strict=True, + ) + ), + # MPD-TMC + ('monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 8, 'A', True, True, False), # correctly-specified: explicit end groups, only linear middle monomers, and whole number of sequence repeats + pytest.param( + 'monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 7, 'AB', True, False, False, # will fail due to partial sequence + marks=pytest.mark.xfail( + raises=PartialBlockSequence, + reason='Partial sequence repeat needed to get odd number block out of AB, but partial blocks are disabled', + strict=True, + ) + ), + pytest.param( + 'monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 8, 'AB', True, True, False, # will fail due to 3-functional TMC middle monomer as B + marks=pytest.mark.xfail( + raises=MorphologyError, + reason='One of the monomers requested is non-linear (3-functional)', + strict=True, + ) + ), + # PEG-PLGA + ('monogrp_peg_plga', {}, 15, 'ABC', True, True, False), # test autogen + ('monogrp_peg_plga', {}, 17, 'ABC', True, False, False), # test autogen with whole sequence + ('monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, True, False), # test more complex sequence with non-default explicit end groups + pytest.param( + 'monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, False, False, # will fail due to partial + marks=pytest.mark.xfail( + raises=PartialBlockSequence, + reason='Partial sequence repeat needed to get odd number block out of AB, but partial blocks are disabled', + strict=True, + ) + ), + ('monogrp_peg_plga', {}, 40, 'ABCB', True, True, True), # test longer energy min + ] +) +def test_build_linear_polymer( + monomers : MonomerGroup, + term_orient : dict[str, str], + n_monomers : int, + sequence : str, + minimize_sequence : bool, + allow_partial_sequences : bool, + energy_minimize : bool, + request : pytest.FixtureRequest, # allows for fixture expansion in parameterized arguments + ) -> None: + '''Test linear polymer builder behavior under varing sets of parameters''' + monomers = request.getfixturevalue(monomers) # unpack fixtures into their respective values + monomers.term_orient = term_orient # this edit makes it VITAL that fixtures be function-level + + polymer = build_linear_polymer( + monomers=monomers, + n_monomers=n_monomers, + sequence=sequence, + minimize_sequence=minimize_sequence, + allow_partial_sequences=allow_partial_sequences, + add_Hs=False, + energy_minimize=energy_minimize, + ) + + # characterize middle monomers + n_rep_units = len(polymer.children) + residue_sizes : dict[str, int] = {} + residue_counts = Counter() # TODO: make use of this for checks!! + for middle_monomers in polymer.children: + residue_sizes[middle_monomers.name] = middle_monomers.n_particles + residue_counts[middle_monomers.name] += 1 + + # characterize end groups + end_groups_requested = set(resname for head_or_tail, (resname, mol) in monomers.linear_end_groups().items()) + end_groups_used = set() + for end_group in polymer.end_groups: + if end_group is not None: + end_groups_used.add(end_group.name) + residue_sizes[end_group.name] = end_group.n_particles + residue_counts[middle_monomers.name] += 1 + + total_reps_match = (n_rep_units == n_monomers) + contribs_match = all(num_monomers == monomers.contributions()[resname][0] + for resname, num_monomers in residue_sizes.items() + ) + end_groups_correct = (end_groups_used == end_groups_requested) + # counts_match = ... + + assert all([total_reps_match, contribs_match, end_groups_correct]) #, and counts_match ) \ No newline at end of file From 4e3148875a11da8d18666545cb161eec286ccd8b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 10 Dec 2024 15:05:20 -0700 Subject: [PATCH 174/191] Fixed indent on openff_topology_to_openmm() arguments --- polymerist/mdtools/openfftools/omminter/mdobjects.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/polymerist/mdtools/openfftools/omminter/mdobjects.py b/polymerist/mdtools/openfftools/omminter/mdobjects.py index 2ed2be9..f7dc455 100644 --- a/polymerist/mdtools/openfftools/omminter/mdobjects.py +++ b/polymerist/mdtools/openfftools/omminter/mdobjects.py @@ -39,8 +39,13 @@ def forcefield_flexible(forcefield : Union[ForceField, str, Path]) -> ForceField return ForceField(ff_path) -def openff_topology_to_openmm(offtop : OFFTopology, forcefield : Union[ForceField, str, Path], box_vecs : Optional[Union[VectorQuantity, BoxVectorsQuantity]]=None, - combine_nonbonded_forces : bool=False, add_constrained_forces : bool=False) -> tuple[OMMTopology, System, Quantity]: +def openff_topology_to_openmm( + offtop : OFFTopology, + forcefield : Union[ForceField, str, Path], + box_vecs : Optional[Union[VectorQuantity, BoxVectorsQuantity]]=None, + combine_nonbonded_forces : bool=False, + add_constrained_forces : bool=False + ) -> tuple[OMMTopology, System, Quantity]: '''Converts an OpenFF Topology to an OpenMM Topology, System, and Positions''' if box_vecs is not None: offtop.box_vectors = box_vectors_flexible(box_vecs) From 051d128a012aaf8b9359974455f1c120b598b871 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 10 Dec 2024 16:29:56 -0700 Subject: [PATCH 175/191] Removed deprecated local TKREGS import --- polymerist/mdtools/openfftools/solvation/solvents/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/polymerist/mdtools/openfftools/solvation/solvents/__init__.py b/polymerist/mdtools/openfftools/solvation/solvents/__init__.py index f540da0..a11bf94 100644 --- a/polymerist/mdtools/openfftools/solvation/solvents/__init__.py +++ b/polymerist/mdtools/openfftools/solvation/solvents/__init__.py @@ -10,7 +10,6 @@ from openff.units import unit as offunit from ... import topology -from ... import TKREGS def generate_water_TIP3P() -> Molecule: From e0cdfc583e443c8fcea7c0d25d0272037107e4a6 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 10 Dec 2024 16:30:11 -0700 Subject: [PATCH 176/191] Moved unitsys outside of omminter to resolve circular import --- polymerist/mdtools/openfftools/boxvectors.py | 2 +- polymerist/mdtools/openfftools/omminter/__init__.py | 6 ------ polymerist/mdtools/openfftools/omminter/mdobjects.py | 2 +- polymerist/mdtools/openfftools/solvation/physprops.py | 2 +- polymerist/mdtools/openfftools/{omminter => }/unitsys.py | 0 5 files changed, 3 insertions(+), 9 deletions(-) rename polymerist/mdtools/openfftools/{omminter => }/unitsys.py (100%) diff --git a/polymerist/mdtools/openfftools/boxvectors.py b/polymerist/mdtools/openfftools/boxvectors.py index 49ea028..7495d6a 100644 --- a/polymerist/mdtools/openfftools/boxvectors.py +++ b/polymerist/mdtools/openfftools/boxvectors.py @@ -14,7 +14,7 @@ from openff.toolkit import Topology from openff.interchange.components._packmol import _box_vectors_are_in_reduced_form -from .omminter.unitsys import allow_openmm_units, openff_to_openmm +from .unitsys import allow_openmm_units, openff_to_openmm # CUSTOM TYPES FOR CLARITY, ESPECIALLY WITH UNITS diff --git a/polymerist/mdtools/openfftools/omminter/__init__.py b/polymerist/mdtools/openfftools/omminter/__init__.py index db12755..e1522de 100644 --- a/polymerist/mdtools/openfftools/omminter/__init__.py +++ b/polymerist/mdtools/openfftools/omminter/__init__.py @@ -4,9 +4,3 @@ __email__ = 'timotej.bernat@colorado.edu' from .mdobjects import forcefield_flexible, openff_topology_to_openmm -from .unitsys import ( - openmm_to_openff, - openff_to_openmm, - allow_openmm_units, - allow_openff_units, -) \ No newline at end of file diff --git a/polymerist/mdtools/openfftools/omminter/mdobjects.py b/polymerist/mdtools/openfftools/omminter/mdobjects.py index f7dc455..7f0af55 100644 --- a/polymerist/mdtools/openfftools/omminter/mdobjects.py +++ b/polymerist/mdtools/openfftools/omminter/mdobjects.py @@ -15,7 +15,7 @@ from openmm.app import Topology as OMMTopology from openmm.unit import Quantity -from .unitsys import openff_to_openmm +from ..unitsys import openff_to_openmm from .. import FFDIR from ..boxvectors import box_vectors_flexible, VectorQuantity, BoxVectorsQuantity diff --git a/polymerist/mdtools/openfftools/solvation/physprops.py b/polymerist/mdtools/openfftools/solvation/physprops.py index 1de70a2..bd9f108 100644 --- a/polymerist/mdtools/openfftools/solvation/physprops.py +++ b/polymerist/mdtools/openfftools/solvation/physprops.py @@ -15,7 +15,7 @@ from openff.units import Quantity as OFFQuantity from ....unitutils.dimensions import is_volume -from ..omminter.unitsys import allow_openff_units, openff_to_openmm +from ..unitsys import allow_openff_units, openff_to_openmm # MASS diff --git a/polymerist/mdtools/openfftools/omminter/unitsys.py b/polymerist/mdtools/openfftools/unitsys.py similarity index 100% rename from polymerist/mdtools/openfftools/omminter/unitsys.py rename to polymerist/mdtools/openfftools/unitsys.py From f416a32088d2bf8880d27b3abc1840b7dee2822b Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 10 Dec 2024 17:19:45 -0700 Subject: [PATCH 177/191] Moved sample monomer fragment sets from unit tests to polymerist proper --- polymerist/polymers/monomers/__init__.py | 7 +++- polymerist/polymers/monomers/fragments.py | 32 +++++++++++++++++++ polymerist/tests/polymers/__init__.py | 32 ------------------- .../tests/polymers/building/test_linear.py | 7 ++-- .../tests/polymers/monomers/test_repr.py | 2 +- 5 files changed, 42 insertions(+), 38 deletions(-) create mode 100644 polymerist/polymers/monomers/fragments.py diff --git a/polymerist/polymers/monomers/__init__.py b/polymerist/polymers/monomers/__init__.py index 176a2ff..7e50795 100644 --- a/polymerist/polymers/monomers/__init__.py +++ b/polymerist/polymers/monomers/__init__.py @@ -3,4 +3,9 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' -from .repr import MonomerGroup # make monomer representation available at the module level \ No newline at end of file +from .repr import MonomerGroup # make monomer representation available at the module level +from .fragments import ( + PE_FRAGMENTS, + MPD_TMC_FRAGMENTS, + PEG_PLGA_FRAGMENTS, +) \ No newline at end of file diff --git a/polymerist/polymers/monomers/fragments.py b/polymerist/polymers/monomers/fragments.py new file mode 100644 index 0000000..a563d87 --- /dev/null +++ b/polymerist/polymers/monomers/fragments.py @@ -0,0 +1,32 @@ +'''Catalogue of monomer fragment templates for some common polymer systems''' + +PE_FRAGMENTS : dict[str, list[str]] = { + # PE (polyethylene) + 'PE1': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:6])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:4])-[#1D1+0:5]'], + 'PE2': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:5])-[#1D1+0:6]'], +} + +MPD_TMC_FRAGMENTS : dict[str, list[str]] = { # fragments for common polyamide membrane + # MPD (m-phenyl diamine) + 'MPD-1': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:11])-[#6D3+0:5](-[#1D1+0:12])=[#6D3+0:6](-[#1D1+0:13])-[#6D3+0:7](-[#1D1+0:14])=[#6D3+0:8]-1-[#7D3+0:9](-[#1D1+0:15])-[#1D1+0:16])-[#1D1+0:10]'], + 'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'], + # TMC (trimesoyl chloride) + 'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], + 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], + 'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], +} + +PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers + # PEG (ethylene glycol) + 'PEG-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]'], + 'PEG-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], + 'PEG-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], + # PLA (lactic acid) + 'PLA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]'], + 'PLA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]'], + 'PLA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]'], + # PGA (glycolic acid) + 'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'], + 'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'], + 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'], +} \ No newline at end of file diff --git a/polymerist/tests/polymers/__init__.py b/polymerist/tests/polymers/__init__.py index d0ddb85..f37a37e 100644 --- a/polymerist/tests/polymers/__init__.py +++ b/polymerist/tests/polymers/__init__.py @@ -2,35 +2,3 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' - - -PE_FRAGMENTS : dict[str, list[str]] = { - # PE (polyethylene) - 'PE1': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:6])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:4])-[#1D1+0:5]'], - 'PE2': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:5])-[#1D1+0:6]'], -} - -MPD_TMC_FRAGMENTS : dict[str, list[str]] = { # fragments for common polyamide membrane - # MPD (m-phenyl diamine) - 'MPD-1': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:11])-[#6D3+0:5](-[#1D1+0:12])=[#6D3+0:6](-[#1D1+0:13])-[#6D3+0:7](-[#1D1+0:14])=[#6D3+0:8]-1-[#7D3+0:9](-[#1D1+0:15])-[#1D1+0:16])-[#1D1+0:10]'], - 'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'], - # TMC (trimesoyl chloride) - 'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], - 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], - 'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], -} - -PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers - # PEG (ethylene glycol) - 'PEG-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]'], - 'PEG-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], - 'PEG-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'], - # PLA (lactic acid) - 'PLA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]'], - 'PLA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]'], - 'PLA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]'], - # PGA (glycolic acid) - 'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'], - 'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'], - 'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'], -} diff --git a/polymerist/tests/polymers/building/test_linear.py b/polymerist/tests/polymers/building/test_linear.py index 1c06206..f9be153 100644 --- a/polymerist/tests/polymers/building/test_linear.py +++ b/polymerist/tests/polymers/building/test_linear.py @@ -4,14 +4,13 @@ __email__ = 'timotej.bernat@colorado.edu' import pytest -from pathlib import Path - -from .. import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS from collections import Counter -from polymerist.polymers.building import build_linear_polymer from polymerist.polymers.monomers.repr import MonomerGroup +from polymerist.polymers.monomers.fragments import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS + +from polymerist.polymers.building import build_linear_polymer from polymerist.polymers.exceptions import MorphologyError, PartialBlockSequence, EmptyBlockSequence diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py index ee58435..bf93cdf 100644 --- a/polymerist/tests/polymers/monomers/test_repr.py +++ b/polymerist/tests/polymers/monomers/test_repr.py @@ -7,8 +7,8 @@ import pytest -from ..import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS from polymerist.polymers.monomers.repr import MonomerGroup +from polymerist.polymers.monomers.fragments import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS # Example fragments groups From a77090663adcb6af9f7fdbfa654a41ad2e6e81a0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 10 Dec 2024 17:24:47 -0700 Subject: [PATCH 178/191] Corrected typo in end group autogen warning --- polymerist/polymers/monomers/repr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py index 73f581b..0aa0d8d 100644 --- a/polymerist/polymers/monomers/repr.py +++ b/polymerist/polymers/monomers/repr.py @@ -170,7 +170,7 @@ def linear_end_groups(self) -> dict[str, tuple[str, Chem.Mol]]: for head_or_tail, (resname, rdmol) in zip(['head', 'tail'], self.iter_rdmols(term_only=True)): # zip will bottom out early if fewer than 2 terminal monomers are present term_orient_auto[head_or_tail] = resname # populate purely for logging end_groups_auto[head_or_tail] = (resname, rdmol) - LOGGER.warning(f'No valid terminal monomer orientations defined; auto-assigned orientations "{term_orient_auto}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') + LOGGER.warning(f'No valid terminal monomer orientations defined, auto-assigned orientations "{term_orient_auto}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!') return end_groups_auto From c991c17fcaebcef7a163932bf8f3ce28a4979e9f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Tue, 10 Dec 2024 17:30:04 -0700 Subject: [PATCH 179/191] Fixed indent on serialize_openmm_pdb() arguments --- polymerist/mdtools/openmmtools/serialization.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/polymerist/mdtools/openmmtools/serialization.py b/polymerist/mdtools/openmmtools/serialization.py index 521e8f0..d29f7cf 100644 --- a/polymerist/mdtools/openmmtools/serialization.py +++ b/polymerist/mdtools/openmmtools/serialization.py @@ -119,8 +119,15 @@ def serialize_system(sys_path : Path, system : System) -> None: file.write(XmlSerializer.serialize(system)) @allow_string_paths -def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions : Union[NDArray, list[Vec3]], keep_chain_and_res_ids : bool=True, - uniquify_atom_ids : bool=True, num_atom_id_digits : int=2, resname_map : Optional[dict[str, str]]=None) -> None: +def serialize_openmm_pdb( + pdb_path : Path, + topology : OpenMMTopology, + positions : Union[NDArray, list[Vec3]], + keep_chain_and_res_ids : bool=True, + uniquify_atom_ids : bool=True, + num_atom_id_digits : int=2, + resname_map : Optional[dict[str, str]]=None + ) -> None: '''Configure and write an Protein DataBank File from an OpenMM Topology and array of positions Provides options to configure atom ID numbering, residue numbering, and residue naming''' if resname_map is None: From 040a718ab4e8633d646ed12a80b467d06acc7355 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 13:49:58 -0700 Subject: [PATCH 180/191] Fixed accidental duplication of 3-functional TMC monomer fragment --- polymerist/polymers/monomers/fragments.py | 2 +- polymerist/tests/polymers/building/test_linear.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/polymerist/polymers/monomers/fragments.py b/polymerist/polymers/monomers/fragments.py index a563d87..a734e80 100644 --- a/polymerist/polymers/monomers/fragments.py +++ b/polymerist/polymers/monomers/fragments.py @@ -12,7 +12,7 @@ 'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'], # TMC (trimesoyl chloride) 'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], - 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], + 'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'], 'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'], } diff --git a/polymerist/tests/polymers/building/test_linear.py b/polymerist/tests/polymers/building/test_linear.py index f9be153..2c086a9 100644 --- a/polymerist/tests/polymers/building/test_linear.py +++ b/polymerist/tests/polymers/building/test_linear.py @@ -62,7 +62,7 @@ def monogrp_peg_plga() -> MonomerGroup: ) ), pytest.param( - 'monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 8, 'AB', True, True, False, # will fail due to 3-functional TMC middle monomer as B + 'monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 12, 'ABC', True, True, False, # will fail due to 3-functional TMC middle monomer as C marks=pytest.mark.xfail( raises=MorphologyError, reason='One of the monomers requested is non-linear (3-functional)', @@ -74,7 +74,7 @@ def monogrp_peg_plga() -> MonomerGroup: ('monogrp_peg_plga', {}, 17, 'ABC', True, False, False), # test autogen with whole sequence ('monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, True, False), # test more complex sequence with non-default explicit end groups pytest.param( - 'monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, False, False, # will fail due to partial + 'monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, False, False, # will fail due to partial sequence marks=pytest.mark.xfail( raises=PartialBlockSequence, reason='Partial sequence repeat needed to get odd number block out of AB, but partial blocks are disabled', From bbd1b8583f2e5dc1d52819e38dfcf154f51d65c0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 14:53:54 -0700 Subject: [PATCH 181/191] Added new subpackage for molecule file I/O --- polymerist/molfiles/__init__.py | 4 ++ polymerist/molfiles/pdb.py | 72 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 polymerist/molfiles/__init__.py create mode 100644 polymerist/molfiles/pdb.py diff --git a/polymerist/molfiles/__init__.py b/polymerist/molfiles/__init__.py new file mode 100644 index 0000000..314438a --- /dev/null +++ b/polymerist/molfiles/__init__.py @@ -0,0 +1,4 @@ +'''Utilities for reading from and writing to various molecular file formats''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' \ No newline at end of file diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py new file mode 100644 index 0000000..d998a0f --- /dev/null +++ b/polymerist/molfiles/pdb.py @@ -0,0 +1,72 @@ +'''PDB file formatting tools''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' + +from dataclasses import dataclass, field +from collections import Counter + + +@dataclass +class SerialAtomLabeller: + ''' + For assigning unique numbered atom names based on their + order of appearance within a molecule and elemental class + + Useful, for example, in generating unique atom names for a PDB file + + Parameters + ---------- + atom_label_size : int , default 4 + Exact length alloted for any generated atom label + Labels shorter than this are right-padded with spaces, + while labels longer than this are truncated + + Default of 4 is the chosen to be compatible with the PDB specification ("Atom name: lines 13-16, left-justified") + https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html + include_elem_idx : bool, default True + Whether to attach a numerical element-index postfix to atom labels + + E.g. with atom_label_size=4, the fifth carbon in a topology + will be labelled as "C004" with include_elem_idx=True, + while labelled as "C " with include_elem_idx=False, + default_elem_idx : int, default 0 + Starting index for each element category + By default, is 0-indexed; MUST BE POSITIVE + ''' + atom_label_size : int = 4 + include_elem_idx : bool = True + default_elem_idx : int = 0 + + element_counter : Counter = field(init=False, default_factory=Counter) + + def __post_init__(self) -> None: + '''Check ranges on input values''' + if self.atom_label_size < 0: + raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.num_idx_digits})') + + if self.default_elem_idx < 0: + raise ValueError(f'Must provide a non-negative starting index for element indices (provided {self.default_elem_idx})') + + def get_atom_label(self, elem_symbol : str) -> str: + ''' + Obtain a numbered atom label for an atom based on its element, + updating the underlying element context in the process + ''' + if elem_symbol not in self.element_counter: # initialize first occurence to starting value + self.element_counter[elem_symbol] = self.default_elem_idx + + atom_idx_label : str = '' + if self.include_elem_idx: + atom_idx = self.element_counter[elem_symbol] + num_idx_digits = max(self.atom_label_size - len(elem_symbol), 0) # number of symbols left over for an atom index + atom_idx_label = f'{atom_idx:0{num_idx_digits}d}' + + atom_name = f'{elem_symbol}{atom_idx_label}' + atom_name = atom_name.ljust(self.atom_label_size, ' ')[:self.atom_label_size] # pad with spaces if too short, or truncate if too long + assert(len(atom_name) <= self.atom_label_size) # perfunctory check to make sure things are working as expected + + self.element_counter[elem_symbol] += 1 # update tally with addition of new occurence of a particular element + + return atom_name + \ No newline at end of file From 34d1a7bf801f1aee1e718197677804b2587cdab4 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 15:53:04 -0700 Subject: [PATCH 182/191] Froze SerialAtomLabeller dataclass to avoid unintentional label format mutation --- polymerist/molfiles/pdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py index d998a0f..751d611 100644 --- a/polymerist/molfiles/pdb.py +++ b/polymerist/molfiles/pdb.py @@ -7,7 +7,7 @@ from collections import Counter -@dataclass +@dataclass(frozen=True) class SerialAtomLabeller: ''' For assigning unique numbered atom names based on their From d9afdc49f5a5efcc77ac412dca3be05bfc295192 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 15:53:51 -0700 Subject: [PATCH 183/191] Switched PDB atom labelled to dependency-injection based model --- .../mdtools/openmmtools/serialization.py | 16 ++--- polymerist/polymers/building/mbconvert.py | 71 ++++++++----------- 2 files changed, 36 insertions(+), 51 deletions(-) diff --git a/polymerist/mdtools/openmmtools/serialization.py b/polymerist/mdtools/openmmtools/serialization.py index d29f7cf..97348a6 100644 --- a/polymerist/mdtools/openmmtools/serialization.py +++ b/polymerist/mdtools/openmmtools/serialization.py @@ -23,6 +23,7 @@ from ...genutils.fileutils.pathutils import assemble_path from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable from ...genutils.fileutils.jsonio.serialize import PathSerializer +from ...molfiles.pdb import SerialAtomLabeller # DEFINING AND STORING SIMULATION PATHS @@ -124,9 +125,8 @@ def serialize_openmm_pdb( topology : OpenMMTopology, positions : Union[NDArray, list[Vec3]], keep_chain_and_res_ids : bool=True, - uniquify_atom_ids : bool=True, - num_atom_id_digits : int=2, - resname_map : Optional[dict[str, str]]=None + atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(), + resname_map : Optional[dict[str, str]]=None, ) -> None: '''Configure and write an Protein DataBank File from an OpenMM Topology and array of positions Provides options to configure atom ID numbering, residue numbering, and residue naming''' @@ -145,13 +145,9 @@ def serialize_openmm_pdb( residue.name = repl_res_name # individual atom config - element_counter = Counter() # for keeping track of the running index of each distinct element - could be used to produce a Hill formula - for atom in topology.atoms(): - symbol = atom.element.symbol - atom_id = element_counter[symbol] - if uniquify_atom_ids: - atom.name = f'{symbol}{atom_id:0{num_atom_id_digits}d}' # extend atom name with ordered integer with specified number of digits (including leading zeros) - element_counter[symbol] += 1 + if atom_labeller: # implicitly, preserves extant atom names if a labeller is not given + for atom in topology.atoms(): + atom.name = atom_labeller.get_atom_label(atom.element.symbol) # file write with pdb_path.open('w') as file: diff --git a/polymerist/polymers/building/mbconvert.py b/polymerist/polymers/building/mbconvert.py index ebef303..8d0be5d 100644 --- a/polymerist/polymers/building/mbconvert.py +++ b/polymerist/polymers/building/mbconvert.py @@ -18,11 +18,7 @@ ) from typing import Optional - from pathlib import Path -from collections import Counter - -from rdkit import Chem import warnings with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings @@ -30,8 +26,11 @@ from mbuild import Compound from mbuild.conversion import from_rdkit -from ..monomers.specification import SANITIZE_AS_KEKULE +from rdkit import Chem + from ...genutils.decorators.functional import allow_string_paths, allow_pathlib_paths +from ..monomers.specification import SANITIZE_AS_KEKULE +from ...molfiles.pdb import SerialAtomLabeller from ...rdutils.bonding.portlib import get_linker_ids from ...rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports from ...mdtools.openmmtools.serialization import serialize_openmm_pdb @@ -57,22 +56,24 @@ def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tup return mb_compound, linker_ids # Conversion from Compound to other formats +_DEFAULT_RESNAME_MAP : dict[str, str] = { # module-wide config for default PDB residue name replacements for polymers + 'RES' : 'Pol', +} + def mbmol_to_rdmol( # TODO: deduplify PDB atom name and residue numbering code against serialize_openmm_pdb() mbmol : Compound, - uniquify_atom_ids : bool=False, - num_atom_id_digits : int=2, + atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(), resname_map : Optional[dict[str, str]]=None ) -> Chem.Mol: '''Convert an mBuild Compound into an RDKit Mol, with correct atom coordinates and PDB residue info''' if resname_map is None: - resname_map = {} + resname_map = _DEFAULT_RESNAME_MAP rdmol = mbmol.to_rdkit() conformer = Chem.Conformer() conformer.Set3D(True) atom_id : int = 0 - element_counter = Counter() for resnum, mb_monomer in enumerate(mbmol.children, start=1): resname = resname_map.get(mb_monomer.name, mb_monomer.name[:3]) # if no remapping is found, just take first 3 chars # NOTE: the order of monomers and atoms within those monomers were added in the same order as iterated over here... @@ -82,64 +83,52 @@ def mbmol_to_rdmol( # TODO: deduplify PDB atom name and residue numbering code a # set PDB residue info if monomer hierarchy is present if mbatom != mb_monomer: # for Compounds with a flat hierarchy, the children and particles of children will coincide - symbol = mbatom.element.symbol - atom_ser_id = element_counter[symbol] - atom_ser_str = f'{atom_ser_id:0{num_atom_id_digits}d}' if uniquify_atom_ids else ' ' # double space keeps column justification correct when non-unique - atom_name = f' {symbol}{atom_ser_str}' # need a leading space to get column alignment in PDB compliant with spec - pdb_info = Chem.AtomPDBResidueInfo( - atomName=atom_name, + atomName=4*' ' if not atom_labeller else atom_labeller.get_atom_label(mbatom.element.symbol), residueName=resname, residueNumber=resnum, chainId='1', isHeteroAtom=True, ) - element_counter[symbol] += 1 # only increment AFTER prior value has been assigned to the current atom rdmol.GetAtomWithIdx(atom_id).SetPDBResidueInfo(pdb_info) - atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this - conf_id = rdmol.AddConformer(conformer) + conf_id = rdmol.AddConformer(conformer) # NOTE: recording this to self-document return values (this is intentionally not used) return rdmol # Serialization of Compounds to files +@allow_pathlib_paths +def mbmol_to_rdkit_pdb( + pdb_path : str, + mbmol : Compound, + atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(), + resname_map : Optional[dict[str, str]]=None, + ) -> None: + '''Save an MBuild Compound into an RDKit-formatted PDB file''' + Chem.MolToPDBFile( + mbmol_to_rdmol(mbmol, atom_labeller=atom_labeller, resname_map=resname_map), + pdb_path, + ) + @allow_string_paths def mbmol_to_openmm_pdb( pdb_path : Path, mbmol : Compound, - num_atom_digits : int=2, + atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(), resname_map : Optional[dict[str, str]]=None, ) -> None: '''Save an MBuild Compound into an OpenMM-formatted PDB file''' if resname_map is None: # avoid mutable default - resname_map = {'RES' : 'Pol'} + resname_map = _DEFAULT_RESNAME_MAP traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory + # TODO: add monomer name transfer to PDB residue names serialize_openmm_pdb( pdb_path, topology=omm_top, positions=omm_pos, - uniquify_atom_ids=True, - num_atom_id_digits=num_atom_digits, - resname_map=resname_map - ) - -@allow_pathlib_paths -def mbmol_to_rdkit_pdb( - pdb_path : str, - mbmol : Compound, - num_atom_digits : int=2, - resname_map : Optional[dict[str, str]]=None, - ) -> None: - '''Save an MBuild Compound into an RDKit-formatted PDB file''' - Chem.MolToPDBFile( - mbmol_to_rdmol( - mbmol, - uniquify_atom_ids=True, - num_atom_id_digits=num_atom_digits, - resname_map=resname_map - ), - pdb_path, + atom_labeller=atom_labeller, + resname_map=resname_map, ) \ No newline at end of file From 668c3f96fef9dd2edcfe5b9cbaf5156d75867449 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 16:04:17 -0700 Subject: [PATCH 184/191] Renamed "chain" to "polymer" where it occurs to avoid confusion with the related-but-distinct OpenMM notion of a Chain --- polymerist/polymers/building/linear.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/polymerist/polymers/building/linear.py b/polymerist/polymers/building/linear.py index b1a5fd4..dab50f4 100644 --- a/polymerist/polymers/building/linear.py +++ b/polymerist/polymers/building/linear.py @@ -50,21 +50,21 @@ def build_linear_polymer( sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY - chain = MBPolymer() + polymer = MBPolymer() monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building ## 2A) ADD MIDDLE MONOMERS TO CHAIN for symbol, (resname, middle_monomer) in zip(sequence_unique, monomers.iter_rdmols(term_only=False)): # zip with sequence limits number of middle monomers to length of block sequence LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname) - chain.add_monomer(compound=mb_monomer, indices=linker_ids) + polymer.add_monomer(compound=mb_monomer, indices=linker_ids) monomers_selected.monomers[resname] = monomers.monomers[resname] ## 2B) ADD TERMINAL MONOMERS TO CHAIN for head_or_tail, (resname, term_monomer) in end_groups.items(): LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")') mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname) - chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation + polymer.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation monomers_selected.monomers[resname] = monomers.monomers[resname] # 3) ASSEMBLE AND RETURN CHAIN @@ -74,15 +74,15 @@ def build_linear_polymer( n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)') - chain.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) - for atom in chain.particles(): + polymer.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers) + for atom in polymer.particles(): atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings) - LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {chain.n_particles} atoms)') + LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {polymer.n_particles} atoms)') # 4) OPTIONALLY, PERFORM FINAL UFF ENERGY MINIMIZATION if energy_minimize: LOGGER.info('Energy-minimizing chain to find more stable conformer') - chain.energy_minimize() + polymer.energy_minimize() LOGGER.info('Energy minimization completed') - return chain \ No newline at end of file + return polymer \ No newline at end of file From a5cc442bccfa42597a9da65f6b29f53a1e2bd2dd Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 16:06:52 -0700 Subject: [PATCH 185/191] Added placeholder unit tests for newly-created `molfiles` subpackage --- polymerist/tests/molfiles/__init__.py | 4 ++++ polymerist/tests/molfiles/test_pdb.py | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 polymerist/tests/molfiles/__init__.py create mode 100644 polymerist/tests/molfiles/test_pdb.py diff --git a/polymerist/tests/molfiles/__init__.py b/polymerist/tests/molfiles/__init__.py new file mode 100644 index 0000000..d92450c --- /dev/null +++ b/polymerist/tests/molfiles/__init__.py @@ -0,0 +1,4 @@ +'''Unit tests for `molfiles` package''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' diff --git a/polymerist/tests/molfiles/test_pdb.py b/polymerist/tests/molfiles/test_pdb.py new file mode 100644 index 0000000..f7795b9 --- /dev/null +++ b/polymerist/tests/molfiles/test_pdb.py @@ -0,0 +1,4 @@ +'''Unit tests for PDB file I/O utils''' + +__author__ = 'Timotej Bernat' +__email__ = 'timotej.bernat@colorado.edu' From 4d128e3b50c3fe5c0e60a710462d651cbc24d03f Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 16:51:22 -0700 Subject: [PATCH 186/191] Added residue info injection into mbmol_to_openmm_pdb (PDB outputs are now totally consistent with RDKit PDB output) --- polymerist/polymers/building/mbconvert.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/polymerist/polymers/building/mbconvert.py b/polymerist/polymers/building/mbconvert.py index 8d0be5d..61656d9 100644 --- a/polymerist/polymers/building/mbconvert.py +++ b/polymerist/polymers/building/mbconvert.py @@ -121,9 +121,10 @@ def mbmol_to_openmm_pdb( if resname_map is None: # avoid mutable default resname_map = _DEFAULT_RESNAME_MAP - traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format) + # NOTE: converting through MDTraj first before going to OpenMM preserves much + # of the necessary chemical info that is discarded when converting through other formats + traj = mbmol.to_trajectory(residues=[residue.name for residue in mbmol.children]) # extract names of repeat units omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory - # TODO: add monomer name transfer to PDB residue names serialize_openmm_pdb( pdb_path, From f1f8039a5a81e5ab16ac3bd64e5fceecadfdf251 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 17:16:31 -0700 Subject: [PATCH 187/191] Renamed "atom_label_size" to "atom_label_length" for clarity --- polymerist/molfiles/pdb.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py index 751d611..dc9f592 100644 --- a/polymerist/molfiles/pdb.py +++ b/polymerist/molfiles/pdb.py @@ -17,7 +17,7 @@ class SerialAtomLabeller: Parameters ---------- - atom_label_size : int , default 4 + atom_label_length : int , default 4 Exact length alloted for any generated atom label Labels shorter than this are right-padded with spaces, while labels longer than this are truncated @@ -27,22 +27,22 @@ class SerialAtomLabeller: include_elem_idx : bool, default True Whether to attach a numerical element-index postfix to atom labels - E.g. with atom_label_size=4, the fifth carbon in a topology + E.g. with atom_label_length=4, the fifth carbon in a topology will be labelled as "C004" with include_elem_idx=True, while labelled as "C " with include_elem_idx=False, default_elem_idx : int, default 0 Starting index for each element category By default, is 0-indexed; MUST BE POSITIVE ''' - atom_label_size : int = 4 - include_elem_idx : bool = True - default_elem_idx : int = 0 + atom_label_length : int = 4 + include_elem_idx : bool = True + default_elem_idx : int = 0 element_counter : Counter = field(init=False, default_factory=Counter) def __post_init__(self) -> None: '''Check ranges on input values''' - if self.atom_label_size < 0: + if self.atom_label_length < 0: raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.num_idx_digits})') if self.default_elem_idx < 0: @@ -59,12 +59,12 @@ def get_atom_label(self, elem_symbol : str) -> str: atom_idx_label : str = '' if self.include_elem_idx: atom_idx = self.element_counter[elem_symbol] - num_idx_digits = max(self.atom_label_size - len(elem_symbol), 0) # number of symbols left over for an atom index + num_idx_digits = max(self.atom_label_length - len(elem_symbol), 0) # number of symbols left over for an atom index atom_idx_label = f'{atom_idx:0{num_idx_digits}d}' atom_name = f'{elem_symbol}{atom_idx_label}' - atom_name = atom_name.ljust(self.atom_label_size, ' ')[:self.atom_label_size] # pad with spaces if too short, or truncate if too long - assert(len(atom_name) <= self.atom_label_size) # perfunctory check to make sure things are working as expected + atom_name = atom_name.ljust(self.atom_label_length, ' ')[:self.atom_label_length] # pad with spaces if too short, or truncate if too long + assert(len(atom_name) <= self.atom_label_length) # perfunctory check to make sure things are working as expected self.element_counter[elem_symbol] += 1 # update tally with addition of new occurence of a particular element From 370dc3aeb5b913c3fddfc4cc7c6913198c980415 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 17:33:33 -0700 Subject: [PATCH 188/191] Renamed once more to atom_label_width --- polymerist/molfiles/pdb.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py index dc9f592..b0fe63b 100644 --- a/polymerist/molfiles/pdb.py +++ b/polymerist/molfiles/pdb.py @@ -17,7 +17,7 @@ class SerialAtomLabeller: Parameters ---------- - atom_label_length : int , default 4 + atom_label_width : int , default 4 Exact length alloted for any generated atom label Labels shorter than this are right-padded with spaces, while labels longer than this are truncated @@ -27,22 +27,22 @@ class SerialAtomLabeller: include_elem_idx : bool, default True Whether to attach a numerical element-index postfix to atom labels - E.g. with atom_label_length=4, the fifth carbon in a topology + E.g. with atom_label_width=4, the fifth carbon in a topology will be labelled as "C004" with include_elem_idx=True, while labelled as "C " with include_elem_idx=False, default_elem_idx : int, default 0 Starting index for each element category By default, is 0-indexed; MUST BE POSITIVE ''' - atom_label_length : int = 4 - include_elem_idx : bool = True - default_elem_idx : int = 0 + atom_label_width : int = 4 + include_elem_idx : bool = True + default_elem_idx : int = 0 element_counter : Counter = field(init=False, default_factory=Counter) def __post_init__(self) -> None: '''Check ranges on input values''' - if self.atom_label_length < 0: + if self.atom_label_width < 0: raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.num_idx_digits})') if self.default_elem_idx < 0: @@ -59,12 +59,12 @@ def get_atom_label(self, elem_symbol : str) -> str: atom_idx_label : str = '' if self.include_elem_idx: atom_idx = self.element_counter[elem_symbol] - num_idx_digits = max(self.atom_label_length - len(elem_symbol), 0) # number of symbols left over for an atom index + num_idx_digits = max(self.atom_label_width - len(elem_symbol), 0) # number of symbols left over for an atom index atom_idx_label = f'{atom_idx:0{num_idx_digits}d}' atom_name = f'{elem_symbol}{atom_idx_label}' - atom_name = atom_name.ljust(self.atom_label_length, ' ')[:self.atom_label_length] # pad with spaces if too short, or truncate if too long - assert(len(atom_name) <= self.atom_label_length) # perfunctory check to make sure things are working as expected + atom_name = atom_name.ljust(self.atom_label_width, ' ')[:self.atom_label_width] # pad with spaces if too short, or truncate if too long + assert(len(atom_name) <= self.atom_label_width) # perfunctory check to make sure things are working as expected self.element_counter[elem_symbol] += 1 # update tally with addition of new occurence of a particular element From c61f16a99e9bb7afeb361c5a641eb163f93dd3c8 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 17:41:17 -0700 Subject: [PATCH 189/191] Fixed non-attribute value in atom_label_width Exception message --- polymerist/molfiles/pdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py index b0fe63b..336e382 100644 --- a/polymerist/molfiles/pdb.py +++ b/polymerist/molfiles/pdb.py @@ -43,7 +43,7 @@ class SerialAtomLabeller: def __post_init__(self) -> None: '''Check ranges on input values''' if self.atom_label_width < 0: - raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.num_idx_digits})') + raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.atom_label_width})') if self.default_elem_idx < 0: raise ValueError(f'Must provide a non-negative starting index for element indices (provided {self.default_elem_idx})') From e40600ec158d611cb5d9a06b2d1d4523ec1ac2e0 Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 17:45:17 -0700 Subject: [PATCH 190/191] Added string type check for atom element symbols --- polymerist/molfiles/pdb.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py index 336e382..1852423 100644 --- a/polymerist/molfiles/pdb.py +++ b/polymerist/molfiles/pdb.py @@ -53,6 +53,9 @@ def get_atom_label(self, elem_symbol : str) -> str: Obtain a numbered atom label for an atom based on its element, updating the underlying element context in the process ''' + if not isinstance(elem_symbol, str): + raise TypeError(f'Must pass symbol of atom\'s element as str (not type {type(elem_symbol).__name__})') + if elem_symbol not in self.element_counter: # initialize first occurence to starting value self.element_counter[elem_symbol] = self.default_elem_idx From 5c282d73932cd98090a24894e8f957eae1e7681c Mon Sep 17 00:00:00 2001 From: Timotej Bernat Date: Wed, 11 Dec 2024 17:45:33 -0700 Subject: [PATCH 191/191] Wrote unit tests for molfiles.pdb.SerialAtomLabeller --- polymerist/tests/molfiles/test_pdb.py | 58 +++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/polymerist/tests/molfiles/test_pdb.py b/polymerist/tests/molfiles/test_pdb.py index f7795b9..9be9f3a 100644 --- a/polymerist/tests/molfiles/test_pdb.py +++ b/polymerist/tests/molfiles/test_pdb.py @@ -2,3 +2,61 @@ __author__ = 'Timotej Bernat' __email__ = 'timotej.bernat@colorado.edu' + +import pytest +from polymerist.molfiles.pdb import SerialAtomLabeller + + +ELEMS : tuple[str] = ('C', 'H', 'H', 'H', 'N', 'H', 'C', 'O', 'Cl') # atoms for methylcarbamoyl chloride (MCC) + +@pytest.mark.parametrize( + 'mol_atom_elems, atom_label_width, include_elem_idx, default_elem_idx, expected_labels', + [ + (ELEMS, 4, True, 0, ['C000', 'H000', 'H001', 'H002', 'N000', 'H003', 'C001', 'O000', 'Cl00']), # test with default PDD-compatible settings + (ELEMS, 4, True, 2, ['C002', 'H002', 'H003', 'H004', 'N002', 'H005', 'C003', 'O002', 'Cl02']), # test element index offset + (ELEMS, 3, True, 2, ['C02', 'H02', 'H03', 'H04', 'N02', 'H05', 'C03', 'O02', 'Cl2']), # test shorter atom label width + (ELEMS, 1, True, 0, ['C', 'H', 'H', 'H', 'N', 'H', 'C', 'O', 'C']), # test truncation works below threshold where indices can be written + (ELEMS, 4, False, 0, ['C ', 'H ', 'H ', 'H ', 'N ', 'H ', 'C ', 'O ', 'Cl ']), # test without element indices + (ELEMS, 4, False, 7, ['C ', 'H ', 'H ', 'H ', 'N ', 'H ', 'C ', 'O ', 'Cl ']), # test that default indices has no impact when indices aren't present + (ELEMS, 0, False, 0, ['', '', '', '', '', '', '', '', '']), # test null-width labels + # Invalid input handling checks + pytest.param( + ELEMS, -1, True, 0, [], # test that negative label width is rejected as intended + marks=pytest.mark.xfail( + raises=ValueError, + reason='Negative atom label widths not allowed', + strict=True, + ) + ), + pytest.param( + ELEMS, 4, True, -5, [], # test that negative default indices are rejected as intended + marks=pytest.mark.xfail( + raises=ValueError, + reason='Negative element indices not allowed', + strict=True, + ) + ), + pytest.param( + tuple(len(elem) for elem in ELEMS), 4, True, 0, [], # test that negative default indices are rejected as intended + marks=pytest.mark.xfail( + raises=TypeError, + reason='Must pass atom elements as strings', + strict=True, + ) + ), + ] +) +def test_atom_labeller( + mol_atom_elems : tuple[str], + atom_label_width : int, + include_elem_idx : bool, + default_elem_idx : int, + expected_labels : list[str], + ) -> None: + '''Test that atom labelling hebaves as expected with various label formatting configurations''' + labeller = SerialAtomLabeller( + atom_label_width=atom_label_width, + include_elem_idx=include_elem_idx, + default_elem_idx=default_elem_idx, + ) + assert [labeller.get_atom_label(elem) for elem in mol_atom_elems] == expected_labels \ No newline at end of file