Skip to content

Commit

Permalink
Merge pull request #135 from lbluque/master
Browse files Browse the repository at this point in the history
More flexible indices from cutoffs in cluster subspace (+ minor fixes)
  • Loading branch information
lbluque authored Sep 2, 2021
2 parents cea2946 + f930fb7 commit 0fdcbad
Show file tree
Hide file tree
Showing 139 changed files with 3,801 additions and 22,271 deletions.
2 changes: 1 addition & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Use this section to keep track of changes in the works.
([lbluque](https://github.com/lbluque))
* `UniformlyRandomKernel` for high temperature/random limit sampling.
`ThermalKernel` ABC class for all temperature based MC Kernels.
[\#133](https://github.com/CederGroupHub/smol/pull/134)
[\#134](https://github.com/CederGroupHub/smol/pull/134)
([lbluque](https://github.com/lbluque))

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion docs/build/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 16a90369e52b5ddb1fd305bb17ac2d09
config: 69cc6bb721c10078b22cbedaabcaf519
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file modified docs/build/.doctrees/api.doctree
Binary file not shown.
Binary file modified docs/build/.doctrees/environment.pickle
Binary file not shown.
Binary file modified docs/build/.doctrees/examples.doctree
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/.doctrees/notebooks/1-creating-a-ce.doctree
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/.doctrees/notebooks/2-running-canonical-mc.doctree
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/.doctrees/notebooks/basis-orthogonalization.doctree
Binary file not shown.
Binary file not shown.
Binary file modified docs/build/.doctrees/smol.cofe.doctree
Binary file not shown.
Binary file modified docs/build/.doctrees/smol.cofe.space.doctree
Binary file not shown.
2 changes: 1 addition & 1 deletion docs/build/_sources/examples.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Basic Examples

.. _Running Semi-Grand Canonical Monte Carlo: notebooks/2-1-running-semigrand-mc.ipynb

.. _Preparing cluster expansion training data: notebooks/3-training-data-preparations.ipynb
.. _Preparing cluster expansion training data: notebooks/3-training-data-preparation.ipynb

.. _Adding structures to a StructureWrangler in parallel: notebooks/4-adding-structures-in-parallel.ipynb

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,6 @@
"# and the expansion have it, there is no need to do so.\n",
"save_work(file_path, wrangler, expansion)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
17 changes: 5 additions & 12 deletions docs/build/_sources/notebooks/1-creating-a-ce.ipynb.txt
Original file line number Diff line number Diff line change
Expand Up @@ -314,14 +314,14 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The predicted energy for a structure with composition Li+5 Ni4+1 Ni3+5 O2-12 is -36.46654400197699 eV/prim.\n",
"The predicted energy for a structure with composition Li+2 Ni4+4 Ni3+2 O2-12 is -33.42762309783674 eV/prim.\n",
"\n",
"The fitted coefficients are:\n",
"[-3.44424307e+01 1.52944807e+00 1.52944807e+00 -7.11937730e-02\n",
Expand Down Expand Up @@ -376,7 +376,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_70367/663351370.py:10: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
"/tmp/ipykernel_302279/663351370.py:10: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
" structure = np.random.choice(wrangler.structures)\n"
]
}
Expand Down Expand Up @@ -413,7 +413,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -434,7 +434,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -455,13 +455,6 @@
"for name, obj in work.items():\n",
" print(f'{name}: {type(obj)}\\n')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
51 changes: 29 additions & 22 deletions docs/build/_sources/notebooks/2-1-running-semigrand-mc.ipynb.txt

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions docs/build/_sources/notebooks/2-running-canonical-mc.ipynb.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Sampling information: {'name': 'CanonicalEnsemble', 'kernel': 'Metropolis', 'step': 'swap', 'seed': 16199914525253226741}\n"
"Sampling information: {'name': 'CanonicalEnsemble', 'kernel': 'Metropolis', 'step': 'swap', 'seed': 6449828449790598858}\n"
]
}
],
Expand Down Expand Up @@ -211,7 +211,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Sampling 1 chain(s) at 1500.00 K from a cell with 64 sites: 100%|██████████| 1000000/1000000 [01:50<00:00, 9069.45it/s]\n"
"Sampling 1 chain(s) from a cell with 64 sites...: 100%|████████████| 1000000/1000000 [01:43<00:00, 9691.60it/s]\n"
]
}
],
Expand All @@ -233,9 +233,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Fraction of succesfull steps (efficiency) 0.390277\n",
"The last step energy is -551.4509466160619 eV\n",
"The minimum energy in trajectory is -552.6314360816017 eV\n"
"Fraction of succesfull steps (efficiency) 0.388293\n",
"The last step energy is -552.04643792069 eV\n",
"The minimum energy in trajectory is -552.6314360816021 eV\n"
]
}
],
Expand Down Expand Up @@ -303,8 +303,8 @@
"text": [
"A total of 10000 samples taken.\n",
"A total of 9000 samples used for production.\n",
"The average energy is -551.7860869325897 eV\n",
"The energy variance is 0.04600543434696906 eV^2\n"
"The average energy is -551.7892375412837 eV\n",
"The energy variance is 0.04837940701855058 eV^2\n"
]
}
],
Expand Down Expand Up @@ -363,7 +363,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.9.5"
}
},
"nbformat": 4,
Expand Down
100 changes: 59 additions & 41 deletions docs/build/_sources/notebooks/3-training-data-preparation.ipynb.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,14 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/lbluque/Develop/pymatgen/pymatgen/ext/matproj.py:454: DeprecationWarning: __init__ is deprecated\n",
"MaterialsProjectCompatibility will be updated with new correction classes as well as new values of corrections and uncertainties in 2020\n",
" def get_pourbaix_entries(self, chemsys, solid_compat=MaterialsProjectCompatibility()):\n"
]
}
],
"outputs": [],
"source": [
"import numpy as np\n",
"import json\n",
"from monty.serialization import loadfn\n",
"from pymatgen.core.structure import Structure\n",
"from smol.cofe import ClusterSubspace, StructureWrangler\n",
"from smol.cofe.configspace import get_specie"
"from smol.cofe.space import get_species"
]
},
{
Expand All @@ -46,6 +36,24 @@
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/lbluque/Develop/smol/smol/cofe/wrangling/wrangler.py:631: UserWarning: Unable to match Ni4+6 O2-12 with properties {'total_energy': -188.28833} to supercell_structure. Throwing out.\n",
" Error Message: Supercell could not be found from structure\n",
" warnings.warn(\n",
"/home/lbluque/Develop/smol/smol/cofe/wrangling/wrangler.py:631: UserWarning: Unable to match Li+2 Ni4+4 Ni3+2 O2-12 with properties {'total_energy': -200.13866} to supercell_structure. Throwing out.\n",
" Error Message: Mapping could not be found from structure.\n",
" warnings.warn(\n",
"/home/lbluque/Develop/smol/smol/cofe/wrangling/wrangler.py:631: UserWarning: Unable to match Li+2 Ni3+2 Ni4+4 O2-12 with properties {'total_energy': -200.42049} to supercell_structure. Throwing out.\n",
" Error Message: Mapping could not be found from structure.\n",
" warnings.warn(\n",
"/home/lbluque/Develop/smol/smol/cofe/wrangling/wrangler.py:631: UserWarning: Unable to match Li+3 Ni4+4 Ni2+1 Ni3+1 O2-12 with properties {'total_energy': -206.70884} to supercell_structure. Throwing out.\n",
" Error Message: Supercell could not be found from structure\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
Expand Down Expand Up @@ -151,8 +159,8 @@
"concentration = []\n",
"for energy, occu in zip(wrangler.get_property_vector('total_energy'),\n",
" wrangler.occupancy_strings):\n",
" n_Li = sum(sp == get_specie('Li+') for sp in occu)\n",
" n_vac = sum(sp == get_specie('Vacancy') for sp in occu)\n",
" n_Li = sum(sp == get_species('Li+') for sp in occu)\n",
" n_vac = sum(sp == get_species('Vacancy') for sp in occu)\n",
" c_Li = n_Li/(n_Li + n_vac)\n",
" mix_en = energy - c_Li*e_LiNiO2 - (1 - c_Li)*e_Ni2O3\n",
" concentration.append(c_Li)\n",
Expand All @@ -178,11 +186,11 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from smol.cofe import weights_energy_above_hull, weights_energy_above_composition\n",
"from smol.cofe.wrangling import weights_energy_above_hull, weights_energy_above_composition\n",
"\n",
"above_compostion = weights_energy_above_composition(wrangler.structures,\n",
" wrangler.get_property_vector('total_energy', normalize=False),\n",
Expand Down Expand Up @@ -211,55 +219,65 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### 4) Filtering structures\n",
"The `StructureWrangler` class can also be used to filter structures to use for a fit based on some criteria.\n",
"### 4) Structure Selection\n",
"The `StructureWrangler` class can also be used to 'filter' structures to use for a fit based on some criteria. To do so we obtain the indices of all structures that satisfy some filtering critera\n",
"\n",
"Currently only a filter by maximum ewald energy is implemented as part of the class.\n",
"\n",
"Going forward more filtering options can be implemented as people write and use different methods of filtering functions."
"For example here we will obtain all the structures with electrostatic energy below a given cuttoff"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-6-aea799c1d123>:6: DeprecationWarning: the filter_by_ewald method is going to be deprecated.\n",
"The functionality will still be available but with a different interface\n",
" wrangler.filter_by_ewald(max_ewald=2)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Kept 26/27 structures with Ewald energies < 2 eV/prim.\n",
"The filters are saved as [{'Ewald': {'max_ewald': 2, 'nstructs_removed': 1, 'nstructs_total': 27}}]\n"
"Included 26/27 structures with Ewald energies < 2 eV/prim.\n",
"Saved indices are ['max_ewald_2']\n"
]
}
],
"source": [
"# filter by maximum ewald energy\n",
"# all structures with ewald energy above the cutoff\n",
"# will be removed\n",
"from smol.cofe.wrangling import max_ewald_energy_indices\n",
"\n",
"n_structs_before = wrangler.num_structures\n",
"wrangler.filter_by_ewald(max_ewald=2)\n",
"# get the structure indices\n",
"indices = max_ewald_energy_indices(wrangler,\n",
" max_relative_energy=2)\n",
"# save them in the structure wrangler\n",
"wrangler.add_data_indices('max_ewald_2', indices)\n",
"\n",
"print(f'Kept {wrangler.num_structures}/{n_structs_before} structures with Ewald energies < 2 eV/prim.')\n",
"print(f\"The filters are saved as {wrangler.metadata['applied_filters']}\")"
"print(f'Included {len(indices)}/{wrangler.num_structures} structures with Ewald energies < 2 eV/prim.')\n",
"print(f'Saved indices are {wrangler.available_indices}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature matrix shape: (26, 11)\n",
"Property vector shape (26,)\n"
]
}
],
"source": [
"# you can use the indices for selected structures to\n",
"# obtain only the corresponding values for those structures\n",
"feature_matrix = wrangler.feature_matrix[indices]\n",
"prop_vector = wrangler.get_property_vector('total_energy')[indices]\n",
"\n",
"print(f'Feature matrix shape: {feature_matrix.shape}')\n",
"print(f'Property vector shape {prop_vector.shape}')"
]
}
],
"metadata": {
Expand All @@ -278,7 +296,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
"version": "3.9.5"
}
},
"nbformat": 4,
Expand Down
Loading

0 comments on commit 0fdcbad

Please sign in to comment.