Skip to content

Commit

Permalink
Species parse oxi state from symbol str (materialsproject#2998)
Browse files Browse the repository at this point in the history
* Species parse oxi_state from symbol str

* var names to snake_case

* add test_symbol_oxi_state_str()
  • Loading branch information
janosh authored May 18, 2023
1 parent 2b47486 commit ad8e6e7
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 53 deletions.
12 changes: 6 additions & 6 deletions dev_scripts/update_pt_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,17 @@ def parse_oxi_state():
with open("periodic_table.yaml") as f:
data = yaml.load(f)
with open("oxidation_states.txt") as f:
oxidata = f.read()
oxidata = re.sub("[\n\r]", "", oxidata)
oxi_data = f.read()
oxi_data = re.sub("[\n\r]", "", oxi_data)
patt = re.compile("<tr>(.*?)</tr>", re.MULTILINE)

for m in patt.finditer(oxidata):
for m in patt.finditer(oxi_data):
line = m.group(1)
line = re.sub("</td>", "", line)
line = re.sub("(<td>)+", "<td>", line)
line = re.sub("</*a[^>]*>", "", line)
el = None
oxistates = []
oxi_states = []
common_oxi = []
for tok in re.split("<td>", line.strip()):
m2 = re.match(r"<b>([A-Z][a-z]*)</b>", tok)
Expand All @@ -53,15 +53,15 @@ def parse_oxi_state():
else:
m3 = re.match(r"(<b>)*([\+\-]\d)(</b>)*", tok)
if m3:
oxistates.append(int(m3.group(2)))
oxi_states.append(int(m3.group(2)))
if m3.group(1):
common_oxi.append(int(m3.group(2)))
if el in data:
del data[el]["Max oxidation state"]
del data[el]["Min oxidation state"]
del data[el]["Oxidation_states"]
del data[el]["Common_oxidation_states"]
data[el]["Oxidation states"] = oxistates
data[el]["Oxidation states"] = oxi_states
data[el]["Common oxidation states"] = common_oxi
else:
print(el)
Expand Down
57 changes: 31 additions & 26 deletions pymatgen/core/periodic_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,17 @@
from enum import Enum
from itertools import combinations, product
from pathlib import Path
from typing import Any, Callable, Literal
from typing import TYPE_CHECKING, Any, Callable, Literal

import numpy as np
from monty.json import MSONable

from pymatgen.core.units import SUPPORTED_UNIT_NAMES, FloatWithUnit, Length, Mass, Unit
from pymatgen.util.string import Stringify, formula_double_format

if TYPE_CHECKING:
from pymatgen.util.typing import SpeciesLike

# Loads element data from json file
with open(str(Path(__file__).absolute().parent / "periodic_table.json")) as f:
_pt_data = json.load(f)
Expand All @@ -30,7 +33,7 @@
class ElementBase(Enum):
"""Element class defined without any enum values so it can be subclassed."""

def __init__(self, symbol: str):
def __init__(self, symbol: SpeciesLike):
"""
Basic immutable element object with all relevant properties.
Expand Down Expand Up @@ -183,7 +186,7 @@ def __init__(self, symbol: str):
energy, etc. Note that this is zero-based indexing! So Element.ionization_energies[0] refer to the 1st
ionization energy. Values are from the NIST Atomic Spectra Database. Missing values are None.
"""
self.symbol = symbol
self.symbol = str(symbol)
d = _pt_data[symbol]

# Store key variables for quick access
Expand Down Expand Up @@ -1044,45 +1047,47 @@ class Species(MSONable, Stringify):

def __init__(
self,
symbol: str,
oxidation_state: float | None = 0.0,
symbol: SpeciesLike,
oxidation_state: float | None = None,
properties: dict | None = None,
):
) -> None:
"""
Initializes a Species.
Args:
symbol (str): Element symbol, e.g., Fe
oxidation_state (float): Oxidation state of element, e.g., 2 or -2
symbol (str): Element symbol optionally incl. oxidation state. E.g. Fe, Fe2+, O2-.
oxidation_state (float): Explicit oxidation state of element, e.g. -2, -1, 0, 1, 2, ...
If oxidation state is present in symbol, this argument is ignored.
properties: Properties associated with the Species, e.g.,
{"spin": 5}. Defaults to None. Properties must be one of the
Species supported_properties.
.. attribute:: oxi_state
Oxidation state associated with Species
.. attribute:: ionic_radius
Ionic radius of Species (with specific oxidation state).
.. versionchanged:: 2.6.7
Raises:
ValueError: If oxidation state passed both in symbol and via oxidation_state kwarg.
"""
if oxidation_state is not None and isinstance(symbol, str) and symbol[-1] in {"+", "-"}:
raise ValueError(
f"Oxidation state should be specified either in {symbol=} or as {oxidation_state=}, not both."
)
if isinstance(symbol, str) and symbol[-1] in {"+", "-"}:
# Extract oxidation state from symbol
symbol, oxi = re.match(r"([A-Za-z]+)([0-9]*[\+\-])", symbol).groups() # type: ignore[union-attr]
self._oxi_state: float | None = (1 if "+" in oxi else -1) * float(oxi[:-1] or 1)
else:
self._oxi_state = oxidation_state

Properties are now checked when comparing two Species for equality.
"""
self._el = Element(symbol)
self._oxi_state = oxidation_state
self._properties = properties or {}
for k, _ in self._properties.items():
if k not in Species.supported_properties:
raise ValueError(f"{k} is not a supported property")
for key in self._properties:
if key not in Species.supported_properties:
raise ValueError(f"{key} is not a supported property")

def __getattr__(self, a):
# overriding getattr doesn't play nice with pickle, so we
# can't use self._properties
p = object.__getattribute__(self, "_properties")
if a in p:
return p[a]
props = object.__getattribute__(self, "_properties")
if a in props:
return props[a]
return getattr(self._el, a)

def __eq__(self, other: object) -> bool:
Expand Down
24 changes: 22 additions & 2 deletions pymatgen/core/tests/test_periodic_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ def test_is(self):
assert Element("Bi").is_post_transition_metal, True


class SpecieTestCase(PymatgenTest):
class SpeciesTestCase(PymatgenTest):
def setUp(self):
self.specie1 = Species.from_string("Fe2+")
self.specie2 = Species("Fe", 3)
Expand Down Expand Up @@ -492,7 +492,27 @@ def test_stringify(self):
assert Species("S", -2).to_unicode_string() == "S²⁻"


class DummySpecieTestCase(unittest.TestCase):
@pytest.mark.parametrize(
("symbol_oxi", "expected_element", "expected_oxi_state"),
[
("Fe", "Fe", None),
("Fe2+", "Fe", 2),
("O2-", "O", -2),
("N-", "N", -1),
("Ca+", "Ca", 1),
("Te3+", "Te", 3),
("P5+", "P", 5),
("Na0+", "Na", 0),
("Na0-", "Na", 0),
],
)
def test_symbol_oxi_state_str(symbol_oxi, expected_element, expected_oxi_state):
species = Species(symbol_oxi)
assert species._el.symbol == expected_element
assert species._oxi_state == expected_oxi_state


class DummySpeciesTestCase(unittest.TestCase):
def test_init(self):
self.specie1 = DummySpecies("X")
with pytest.raises(ValueError):
Expand Down
38 changes: 19 additions & 19 deletions test_files/.pytest-split-durations
Original file line number Diff line number Diff line change
Expand Up @@ -871,11 +871,11 @@
"pymatgen/core/tests/test_operations.py::SymmOpTestCase::test_to_from_dict": 0.00025125101092271507,
"pymatgen/core/tests/test_operations.py::SymmOpTestCase::test_transform_tensor": 0.0006400830461643636,
"pymatgen/core/tests/test_operations.py::SymmOpTestCase::test_xyz": 0.0008586250187363476,
"pymatgen/core/tests/test_periodic_table.py::DummySpecieTestCase::test_eq": 0.0002254169958177954,
"pymatgen/core/tests/test_periodic_table.py::DummySpecieTestCase::test_from_string": 0.0002168750506825745,
"pymatgen/core/tests/test_periodic_table.py::DummySpecieTestCase::test_init": 0.000188207981409505,
"pymatgen/core/tests/test_periodic_table.py::DummySpecieTestCase::test_pickle": 0.00018166599329560995,
"pymatgen/core/tests/test_periodic_table.py::DummySpecieTestCase::test_sort": 0.0002004160196520388,
"pymatgen/core/tests/test_periodic_table.py::DummySpeciesTestCase::test_eq": 0.0002254169958177954,
"pymatgen/core/tests/test_periodic_table.py::DummySpeciesTestCase::test_from_string": 0.0002168750506825745,
"pymatgen/core/tests/test_periodic_table.py::DummySpeciesTestCase::test_init": 0.000188207981409505,
"pymatgen/core/tests/test_periodic_table.py::DummySpeciesTestCase::test_pickle": 0.00018166599329560995,
"pymatgen/core/tests/test_periodic_table.py::DummySpeciesTestCase::test_sort": 0.0002004160196520388,
"pymatgen/core/tests/test_periodic_table.py::ElementTestCase::test_attributes": 0.05512891701073386,
"pymatgen/core/tests/test_periodic_table.py::ElementTestCase::test_block": 0.000250834011239931,
"pymatgen/core/tests/test_periodic_table.py::ElementTestCase::test_data": 0.00028416700661182404,
Expand All @@ -900,20 +900,20 @@
"pymatgen/core/tests/test_periodic_table.py::ElementTestCase::test_term_symbols": 0.0011074999929405749,
"pymatgen/core/tests/test_periodic_table.py::ElementTestCase::test_valence": 0.00038108398439362645,
"pymatgen/core/tests/test_periodic_table.py::FuncTest::test_get_el_sp": 0.0002561250003054738,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_attr": 0.00020691697136498988,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_cmp": 0.00017037399811670184,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_deepcopy": 0.0001749170187395066,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_eq": 0.00017587500042282045,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_get_crystal_field_spin": 0.0003321249969303608,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_get_nmr_mom": 0.0003623329976107925,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_get_shannon_radius": 0.00020208401838317513,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_init": 0.0001648749748710543,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_ionic_radius": 0.0006132499838713557,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_no_oxidation_state": 0.0001666249881964177,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_pickle": 0.0012165000080130994,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_sort": 0.0002456679940223694,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_stringify": 0.00024587399093434215,
"pymatgen/core/tests/test_periodic_table.py::SpecieTestCase::test_to_from_string": 0.00031908301752991974,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_attr": 0.00020691697136498988,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_cmp": 0.00017037399811670184,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_deepcopy": 0.0001749170187395066,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_eq": 0.00017587500042282045,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_get_crystal_field_spin": 0.0003321249969303608,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_get_nmr_mom": 0.0003623329976107925,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_get_shannon_radius": 0.00020208401838317513,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_init": 0.0001648749748710543,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_ionic_radius": 0.0006132499838713557,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_no_oxidation_state": 0.0001666249881964177,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_pickle": 0.0012165000080130994,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_sort": 0.0002456679940223694,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_stringify": 0.00024587399093434215,
"pymatgen/core/tests/test_periodic_table.py::SpeciesTestCase::test_to_from_string": 0.00031908301752991974,
"pymatgen/core/tests/test_settings.py::test_load_settings": 0.003587376035284251,
"pymatgen/core/tests/test_sites.py::PeriodicSiteTest::test_as_from_dict": 0.00044604099821299314,
"pymatgen/core/tests/test_sites.py::PeriodicSiteTest::test_distance": 0.00035195998498238623,
Expand Down

0 comments on commit ad8e6e7

Please sign in to comment.