diff --git a/pyxtal/molecular_crystal.py b/pyxtal/molecular_crystal.py index a5f6646e..5270a1df 100644 --- a/pyxtal/molecular_crystal.py +++ b/pyxtal/molecular_crystal.py @@ -126,7 +126,7 @@ def __init__( compat, self.degrees = self.group.check_compatible(self.numMols, self.valid_orientations) if not compat: - msg = f"Compoisition {self.numMols} not compatible with symmetry {self.group.number}" + msg = f"Inincompatible compoisition {self.numMols} with symmetry {self.group.number}" raise Comp_CompatibilityError(msg) self.set_volume() @@ -151,13 +151,24 @@ def __repr__(self): def set_sites(self, sites): """ - initialize Wyckoff sites + Initialize and store symmetry sites for each molecule. + + This function processes a list of symmetry sites, validates them against the expected + number of molecules, and stores them in the `self.sites` dictionary. Each entry in the + `sites` list can be either a dictionary or another type (list, etc.), and if no valid + site is provided for a molecule, `None` is assigned for that molecule's site. Args: - sites: list + sites (list): A list of sites corresponding to `self.molecules`. They can be: + - A dictionary of site information (keys represent Wyckoff letters or + other identifiers, and values are the corresponding information). + - A list or other type representing site information. + - None, if no symmetry site information is available for that molecule. """ - # Symmetry sites + # Initialize the self.sites dictionary to store site information self.sites = {} + + # Iterate over the molecules and their corresponding sites for i, _mol in enumerate(self.molecules): if sites is not None and sites[i] is not None and len(sites[i]) > 0: self._check_consistency(sites[i], self.numMols[i]) @@ -172,22 +183,40 @@ def set_sites(self, sites): def set_molecules(self, molecules, torsions): """ - Get molecular information + Initialize and store molecular information. + + This function processes a list of molecules and initializes each one + as a `pyxtal_molecule` object. If torsions are provided, they are + applied to the respective molecules during initialization. + It stored information in the `self.molecules` attribute. Args: - molecules: list of molecules - torsions: list of torsions + molecules (list): A list of molecules, where each entry can either be: + - A SMILES string or file path representing a molecule. + - An already-initialized `pyxtal_molecule` object. + torsions (list): A list of torsion angles. The length of `torsions` + must be equal to the length of `molecules`, or None + can be provided if no torsions are needed. """ + + # If no torsions are provided, initialize with None for each molecule if torsions is None: torsions = [None] * len(molecules) + # Initialize the molecules list to store processed pyxtal_molecule objects self.molecules = [] + + # Iterate over the molecules for i, mol in enumerate(molecules): # already a pyxtal_molecule object if isinstance(mol, pyxtal_molecule): p_mol = mol else: - p_mol = pyxtal_molecule(mol, seed=self.seed, torsions=torsions[i], tm=self.tol_matrix, random_state=self.random_state) + p_mol = pyxtal_molecule(mol, + seed=self.seed, + torsions=torsions[i], + tm=self.tol_matrix, + random_state=self.random_state) self.molecules.append(p_mol) def set_orientations(self): @@ -427,56 +456,93 @@ def _set_mol_wyckoffs(self, id, numMol, pyxtal_mol, valid_ori, mol_wyks): def _set_orientation(self, pyxtal_mol, pt, oris, wp): """ - Generate good orientations + Generate valid orientations for a given molecule in a Wyckoff position. + + It tries to generate valid orientations for the molecule by: + - Selecting a random orientation from a list of possible orientations. + - Flipping the orientation to test different alignments. + - Checking the smallest distance between atoms and ensuring it's valid. + - Using the bisection method is refine the orientation. + + Args: + pyxtal_mol: The pyxtal_molecule object representing the molecule. + pt: Position of the molecule. + oris: List of potential orientations. + wp: Wyckoff position object representing the symmetry of the site. + + Returns: + ms0: A valid `mol_site` object if an acceptable orientation is found. + returns `None` if no valid orientation is found within the attempts. """ - # Use a Wyckoff_site object for the current site + + # Increment the number of attempts to generate a valid orientation self.numattempts += 1 + # NOTE removing this copy causes tests to fail -> state not managed well ori = self.random_state.choice(oris).copy() ori.change_orientation(flip=True) + + # Create a mol_site object with the current orientation ms0 = mol_site(pyxtal_mol, pt, ori, wp, self.lattice) - # Check distances within the WP + + # Check if the current orientation results in valid distances if ms0.short_dist(): return ms0 else: - # Maximize the smallest distance for the general - # positions if needed + # Maximize the separation if needed if len(pyxtal_mol.mol) > 1 and ori.degrees > 0: - # bisection method + # Define the distance function for bisection method def fun_dist(angle, ori, mo, pt): # ori0 = ori.copy() ori.change_orientation(angle) - ms0 = mol_site( - mo, - pt, - ori, - wp, - self.lattice, - ) + ms0 = mol_site(mo, pt, ori, wp, self.lattice) return ms0.get_min_dist() + # Set initial bounds for the angle angle_lo = ori.angle angle_hi = angle_lo + np.pi fun_lo = fun_dist(angle_lo, ori, pyxtal_mol, pt) fun_hi = fun_dist(angle_hi, ori, pyxtal_mol, pt) - fun = fun_hi + + fun = fun_hi # Set the initial value for the function + + # Refine the orientation using a bisection method for _it in range(self.ori_attempts): self.numattempts += 1 + + # Return as soon as a good orientation is found if (fun > 0.8) & (ms0.short_dist()): return ms0 + + # Compute the midpoint angle for bisection angle = (angle_lo + angle_hi) / 2 fun = fun_dist(angle, ori, pyxtal_mol, pt) - # print('Bisection: ', it, fun) + + # Update based on the function value at the midpoint if fun_lo > fun_hi: angle_hi, fun_hi = angle, fun else: angle_lo, fun_lo = angle, fun - return None - def _check_consistency(self, site, numMol): """ - Check if the composition is consistent with symmetry + Check if N_mol is consistent with the symmetry constraints of the system. + + It verifies if the sum of molecules from the WP matches (`numMol`). + Each Wyckoff site string in the `site` list includes a number that + represents how many molecules are associated with that site. + + If a inconsistency is found, it raises a ValueError with a detailed message. + + Args: + site (list of str): A list of strings for Wyckoff sites. Each string + contains a number (e.g., "3a", "4b") where the number + indicates how many molecules are at that site. + numMol (int): The total number of molecules expected in the structure. + + Returns: + bool: Returns `True` if the number of molecules matches `numMol`. + Raises a ValueError if they do not match. """ num = 0 for s in site: diff --git a/pyxtal/molecule.py b/pyxtal/molecule.py index c9d9c582..51b67ce1 100644 --- a/pyxtal/molecule.py +++ b/pyxtal/molecule.py @@ -31,18 +31,37 @@ molecule_collection = Collection("molecules") -# single_smiles = [ -# "Cl-", "F-", "Br-", "I-", "Li+", "Na+", "Cs+", "Rb+", -# "[Cl-]", "[F-]", "[Br-]", "[I-]", "[Li+]", "[Na+]", "[Cs+]", "Rb+", -# ] def find_rotor_from_smile(smile): """ - Find the positions of rotatable bonds in the molecule. + Find the positions of rotatable bonds based on a SMILES string. + + Rotatable bonds are those which are not part of rings and which + fit specific chemical patterns. These torsions are filtered by + rules such as avoiding atoms with only one neighbor and avoiding + equivalent torsions. + + Args: + smile (str): The SMILES string representing the molecule. + + Returns: + list of tuples: Each tuple represents a torsion as (i, j, k, l) + where i-j-k-l are atom indices involved in the rotatable bond. + """ def cleaner(list_to_clean, neighbors): """ - Remove duplicate torsion from a list of atom index tuples. + Remove duplicate and invalid torsions from a list of atom index tuples. + + Filters torsions based on the neighbors count for the atoms involved in the torsion. + This avoids torsions that involve terminal atoms and duplicates. + + Args: + list_to_clean (list of tuples): List of torsions (i, j, k, l) + neighbors (list of int): List of neighbors for each atom in the molecule. + + Returns: + list of tuples: Cleaned list of torsions. """ for_remove = [] @@ -54,9 +73,12 @@ def cleaner(list_to_clean, neighbors): # for i-j-k-l, we don't want i, l are the ending members # C-C-S=O is not a good choice since O is only 1-coordinated # C-C-NO2 is a good choice since O is only 1-coordinated + + # Remove torsions that involve terminal atoms with only one neighbor if neighbors[ix0] == 1 and neighbors[ix1] == 2 or neighbors[ix3] == 1 and neighbors[ix2] == 2: for_remove.append(x) else: + # Remove duplicate torsions that are equivalent for y in reversed(range(x)): ix1 = itemgetter(1)(list_to_clean[x]) ix2 = itemgetter(2)(list_to_clean[x]) @@ -75,6 +97,7 @@ def cleaner(list_to_clean, neighbors): else: from rdkit import Chem + # SMARTS patterns to identify rotatable bonds and double bonds smarts_torsion1 = "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" smarts_torsion2 = "[*]~[^2]=[^2]~[*]" # C=C bonds # smarts_torsion2="[*]~[^1]#[^1]~[*]" # C-C triples bonds, to be fixed @@ -89,7 +112,11 @@ def cleaner(list_to_clean, neighbors): torsion1 = cleaner(list(mol.GetSubstructMatches(patn_tor1)), neighbors) patn_tor2 = Chem.MolFromSmarts(smarts_torsion2) torsion2 = cleaner(list(mol.GetSubstructMatches(patn_tor2)), neighbors) + + # Combine and clean torsions tmp = cleaner(torsion1 + torsion2, neighbors) + + # Exclude torsions that are part of rings torsions = [] for t in tmp: (i, j, k, l) = t @@ -103,13 +130,13 @@ def cleaner(list_to_clean, neighbors): def has_non_aromatic_ring(smiles): """ Determine if a molecule has a non-aromatic ring. - Mainly used to check if a cyclic ring exists. + It checks if a cyclic ring system exists that is not aromatic. Args: - smiles: smiles string + smiles (str): A SMILES string representing the molecule. Returns: - True or False + bool: True if it contains a non-aromatic ring, False otherwise. """ from rdkit import Chem @@ -201,24 +228,33 @@ def get_conformers(smile, seed): class pyxtal_molecule: """ - Extended molecule class based on pymatgen.core.structure.Molecule - The added features include: - 0, parse the input - 1, estimate volume/tolerance/radii - 2, find and store symmetry - 3, get the principle axis - 4, re-align the molecule - - The molecule is always centered at (0, 0, 0). - - If the smile format is used, the center is defined as in + A molecule class to support the descriptin of molecules in a xtal + + Features: + 0. Parse the input from different formats (SMILES, xyz, gjf, etc.). + 1. Estimate molecular properties such as volume, tolerance, and radii. + 2. Find and store symmetry information of the molecule. + 3. Get the principal axis of the molecule. + 4. Re-align the molecule to center it at (0, 0, 0). + + SMILES Format: + If a SMILES format is used, the molecular center is defined following + RDKit's handling of molecular transformations: https://www.rdkit.org/docs/source/rdkit.Chem.rdMolTransforms.html Otherwise, the center is just the mean of atomic positions Args: - mol: a string to reprent the molecule - tm: tolerance matrix + mol (str or pymatgen.Molecule): The molecule representation, either as a string + (SMILES or filename) or as a pymatgen `Molecule` object. + tm (Tol_matrix, optional): A tolerance matrix object, used for molecular tolerances. + symmetrize (bool, optional): Whether to symmetrize the molecule using its point group. + fix (bool, optional): Fix torsions in the molecule. + torsions (list, optional): List of torsions to analyze or fix. + seed (int, optional): Random seed for internal processes. Defaults to a hex seed. + random_state (int or numpy.Generator, optional): Numpy random state for random number generation. + symtol (float, optional): Symmetry tolerance. Default is 0.3. + active_sites (list, optional): List of active sites within the molecule. """ def list_molecules(): @@ -237,7 +273,9 @@ def __init__( random_state=None, tm=Tol_matrix(prototype="molecular"), symtol=0.3, + active_sites=None, ): + mo = None self.smile = None self.torsionlist = [] #None @@ -246,6 +284,9 @@ def __init__( seed = 0xF00D self.seed = seed + # Active sites is a two list of tuples [(donors), (acceptors)] + self.active_sites = active_sites + if isinstance(random_state, Generator): self.random_state = random_state.spawn(1)[0] else: @@ -297,6 +338,7 @@ def __init__( self.mol = mo self.get_symmetry() + # Additional molecular properties self.tm = tm self.box = self.get_box() self.volume = self.box.volume @@ -1595,31 +1637,33 @@ def load_dict(cls, dicts): def change_orientation(self, angle="random", flip=False): """ - Allows for specification of an angle (possibly random) to rotate about - the constraint axis. + Change the orientation of molecule by applying a rotation. - Args: - angle: an angle to rotate about the constraint axis. - If "random", chooses a random rotation angle. - If self.degrees==2, chooses a random rotation matrix. - If self.degrees==1, only apply on angle - If self.degrees==0, no change + It allows for specification of an angle (or a random angle) to rotate about + the constraint axis. If the system has 2 degrees of rotational freedom, + the molecule can also be flipped with a probability + Args: + angle (float or str, optional): The angle to rotate about the constraint axis. + If "random", a random rotation angle is selected + flip (bool, optional): Whether to apply an random flip. This is only applied + if the system has 2 degrees of rotational freedom. """ if self.degrees >= 1: - # choose the axis + # Choose the axis if self.axis is None: axis = self.random_state.random(3) - 0.5 self.axis = axis / np.linalg.norm(axis) - # parse the angle + # Parse the angle if angle == "random": angle = self.random_state.random() * np.pi * 2 self.angle = angle - # update the matrix + # Update the matrix r1 = Rotation.from_rotvec(self.angle * self.axis) + # Optionally flip the molecule if self.degrees == 2 and flip and self.random_state.random() > 0.5: ax = self.random_state.choice(["x", "y", "z"]) angle0 = self.random_state.choice([90, 180, 270]) diff --git a/pyxtal/wyckoff_site.py b/pyxtal/wyckoff_site.py index 84b72515..40a7b423 100644 --- a/pyxtal/wyckoff_site.py +++ b/pyxtal/wyckoff_site.py @@ -4,7 +4,6 @@ # Standard Libraries from copy import deepcopy - import numpy as np # External Libraries @@ -22,8 +21,6 @@ filtered_coords, ) from pyxtal.symmetry import Group, Wyckoff_position - -# PyXtal imports from pyxtal.tolerance import Tol_matrix @@ -359,17 +356,35 @@ def to_mol_site(self, lattice, molecule, ori=None, reflect=False, type_id=0): class mol_site: """ Class for storing molecular Wyckoff positions and orientations within - the molecular_crystal class. Each mol_site object represenents an + the `molecular_crystal` class. Each `mol_site` object represents an entire Wyckoff position, not necessarily a single molecule. - This is the molecular version of Wyckoff_site + This is the molecular version of the `Wyckoff_site` class. + + Attributes: + mol (pyxtal_molecule): A pyxtal_molecule object representing the molecule at the site. + position (list or array): A 3-vector representing the generating molecule's position + in fractional coordinates. + orientation (Orientation): An orientation object that describes the molecule's orientation. + wp (Wyckoff_position): A Wyckoff position object that holds symmetry information. + lattice (Lattice): A lattice object that defines the crystal structure's unit cell. + stype (int): An integer specifying the type of molecule. Default is 0. + symbols (list): List of atomic symbols for the atoms in the molecule. + numbers (list): List of atomic numbers for the atoms in the molecule. + PBC (list): Periodic boundary conditions inherited from the Wyckoff position. + radius (float): Radius of the molecule, typically used for collision detection. + tols_matrix (numpy array): Tolerance matrix for the molecular structure. Args: - mol: a `pyxtal_molecule `_ object - position: the 3-vector representing the generating molecule's position - orientation: an `Orientation `_ object - wp: a `Wyckoff_position `_ object - lattice: a `Lattice `_ object - stype: integer number to specify the type of molecule + mol (pyxtal_molecule): A `pyxtal_molecule` object that describes the molecule. + position (list or array): The 3D fractional coordinates of mol_center in the unit cell. + orientation (Orientation): The orientation object describing the molecule's rotation. + wp (Wyckoff_position): A `Wyckoff_position` object defining the symmetry of the site. + lattice (Lattice, optional): The lattice of the crystal. Can be either a Lattice object + or a matrix that will be converted into a Lattice object. + stype (int, optional): Integer specifying the type of molecule. Default is 0. + + Methods: + _get_dof(): Internal method to calculate the degrees of freedom (DoF) for the molecule. """ def __init__(self, mol, position, orientation, wp, lattice=None, stype=0): @@ -605,11 +620,6 @@ def _get_coords_and_species(self, absolute=False, PBC=False, first=False, unitce rot = op2_m.affine_matrix[:3, :3].T # NOTE=====the euclidean_generator has wrong translation vectors, # but we don't care. This needs to be fixed later - - # if self.diag and self.wp.index > 0: - # tau = op2.translation_vector - # else: - # tau = op2_m.translation_vector tmp = np.dot(coord0, rot) # + tau # Add absolute center to molecule @@ -887,23 +897,26 @@ def get_ijk_range(pbc, abc_val, ignore, radius): def get_distances(self, coord1, coord2, m2=None, center=True, ignore=False): """ - Compute the distance matrix between the center molecule (m1 length) and - neighbors (m2 length) within the PBC consideration (pbc) + Compute the distance matrix between the central molecule (coord1) and + neighboring molecules (coord2) under the periodic boundary condition. Args: - coord1: fractional coordinates of the center molecule - coord2: fractional coordinates of the reference neighbors - m2: the length of reference molecule - center: whether or not consider the self image for coord2 - ignore: + coord1 (numpy array): Fractional coordinates of the central molecule. + Shape: (m1, 3), where m1 is the number of atoms + coord2 (numpy array): Fractional coordinates of the neighboring molecules. + Shape: (N2*m2, 3), where N2 is the number of atoms + and m2 is the number of atoms in each neighboring molecule. + m2 (int, optional): N_atoms in each neighboring molecule. If not provided, + it's assumed to be equal m1. + center (bool, optional): If `True`, count self-image of the reference molecule + ignore (bool, optional): If `True`, ignores some periodic boundary conditions. Returns: distance matrix: [m1*m2*pbc, m1, m2] coord2 under PBC: [pbc, m2, 3] """ m1 = len(coord1) - if m2 is None: - m2 = m1 + if m2 is None: m2 = m1 N2 = int(len(coord2) / m2) # peridoic images @@ -932,7 +945,7 @@ def get_dists_auto(self, ignore=False): def get_dists_WP(self, ignore=False, idx=None): """ - Compute the distances within the WP sites + Compute the distances within the WP site Returns: a distance matrix (M, N, N) @@ -1035,25 +1048,46 @@ def short_dist_with_wp2(self, wp2, tm=Tol_matrix(prototype="molecular")): def get_neighbors_auto(self, factor=1.1, max_d=4.0, ignore_E=True, detail=False, etol=-5e-2): """ - Find the neigboring molecules + Find neighboring molecules around the central molecule within a given distance threshold. + + The function identifies neighboring molecules within PBC and computes the shortest + distances and (optionally) interaction energies. it returns detailed information + about neighboring molecule pairs, distances, and energies. Args: - factor: volume factor - max_d: maximum intermolecular distance - ignore_E: - detail: show detailed energies + factor (float, optional): Scaling factor for distance tolerances (default is 1.1). + max_d (float, optional): Maximum allowed intermolecular distance for neighbors (default is 4.0 Å). + ignore_E (bool, optional): If `True`, skips energy calculations (default is `True`). + detail (bool, optional): If `True`, returns detailed energy, molecular pairs, and distances + instead of just the shortest distances. + etol (float, optional): Energy tolerance for filtering pairs in detailed mode (default is -5e-2). - Returns - min_ds: list of shortest distances - neighs: list of neighboring molecular xyzs - """ - mol_center = np.dot( - self.position - np.floor(self.position), self.lattice.matrix) + Returns: + If `detail == True`: + engs (list): List of interaction energies for valid molecular pairs. + pairs (list): List of tuples containing neighboring molecules and their relative positions. + dists (list): List of distances between neighboring molecular pairs. + If `detail == False`: + min_ds (list): List of shortest distances between the central molecule and neighbors. + neighs (list): List of neighboring molecular coordinates (with PBC applied). + Ps (list): List of Wyckoff position multiplicities or translations. + engs (list): List of interaction energies, or `None` if energy calculation is skipped. + """ + # Compute the mol_center in Cartesian coordinate + position = self.position - np.floor(self.position) + mol_center = np.dot(position, self.lattice.matrix) + + # Atomic numbers for atoms in the central molecule numbers = self.molecule.mol.atomic_numbers + + # Get fractional coordinates for the central molecule coord1, _ = self._get_coords_and_species(first=True, unitcell=True) + + # Initialize tolerance matrix for intermolecular distances (based on van der Waals radii) tm = Tol_matrix(prototype="vdW", factor=factor) - len(self.numbers) tols_matrix = self.molecule.get_tols_matrix(tm=tm) + + # Initialize coefficient matrix for energy calculations if needed coef_matrix = None if not ignore_E: coef_matrix = self.molecule.get_coefs_matrix() @@ -1062,6 +1096,7 @@ def get_neighbors_auto(self, factor=1.1, max_d=4.0, ignore_E=True, detail=False, B = coef_matrix[:, :, 1] C = coef_matrix[:, :, 2] + # Initialize lists for results min_ds = [] neighs = [] Ps = [] @@ -1069,19 +1104,16 @@ def get_neighbors_auto(self, factor=1.1, max_d=4.0, ignore_E=True, detail=False, pairs = [] dists = [] - # Check periodic images + # Find neighbors under PBC d, coord2 = self.get_dists_auto(ignore=True) + + # Loop through each neighboring molecule for i in range(d.shape[0]): if np.min(d[i]) < max_d and (d[i] < tols_matrix).any(): if coef_matrix is not None: if detail: eng = A * np.exp(-B * d[i]) - C / (d[i] ** 6) ids = np.where(eng < etol) - # for id in zip(*ids): - # tmp1, tmp2 = coord1[id[0]], coord2[i][id[1]] - # pairs.append((tmp1+tmp2)/2) - # engs.append(eng[id]) - # dists.append(d[i][id]) for id in range(len(ids[0])): n1, n2 = numbers[ids[0][id]], numbers[ids[1][id]] if 1 not in [n1, n2]: @@ -1111,6 +1143,7 @@ def get_neighbors_auto(self, factor=1.1, max_d=4.0, ignore_E=True, detail=False, neighs.append(coord2[i]) Ps.append(0) + # Handle Wyckoff position multiplicities (if applicable) if self.wp.multiplicity > 1: for idx in range(1, self.wp.multiplicity): P = 0 if self.wp.is_pure_translation(idx) else 1 @@ -1121,11 +1154,6 @@ def get_neighbors_auto(self, factor=1.1, max_d=4.0, ignore_E=True, detail=False, if detail: eng = A * np.exp(-B * d[i]) - C / (d[i] ** 6) ids = np.where(eng < etol) - # for id in zip(*ids): - # tmp1, tmp2 = coord1[id[0]], coord2[i][id[1]] - # pairs.append((tmp1+tmp2)/2) - # engs.append(eng[id]) - # dists.append(d[i][id]) for id in range(len(ids[0])): n1, n2 = numbers[ids[0][id] ], numbers[ids[1][id]] @@ -1163,6 +1191,8 @@ def get_neighbors_auto(self, factor=1.1, max_d=4.0, ignore_E=True, detail=False, min_ds.append(min(_d) * factor) neighs.append(coord2[i]) Ps.append(P) + + # Return results based on the detail flag if detail: return engs, pairs, dists else: