Skip to content

Commit

Permalink
UpfData: automatically parse the Z valence from the file (#24)
Browse files Browse the repository at this point in the history
This value is required for, among other things, to determine starting
magnetization in spin polarized calculation. Instead of having to parse
the file each time for this value, it is done upon construction of the
node and stored as an attribute.
  • Loading branch information
sphuber authored Nov 17, 2020
1 parent 0deb470 commit 33e6ac9
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 11 deletions.
74 changes: 63 additions & 11 deletions aiida_pseudo/data/pseudo/upf.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""Module for data plugin to represent a pseudo potential in UPF format."""
import re
from typing import BinaryIO
from typing import BinaryIO, Union

from .pseudo import PseudoPotentialData

Expand All @@ -10,30 +10,58 @@
REGEX_ELEMENT_V1 = re.compile(r"""(?P<element>[a-zA-Z]{1,2})\s+Element""")
REGEX_ELEMENT_V2 = re.compile(r"""\s*element\s*=\s*['"]\s*(?P<element>[a-zA-Z]{1,2})\s*['"].*""")

PATTERN_FLOAT = r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?'
REGEX_Z_VALENCE_V1 = re.compile(r"""(?P<z_valence>""" + PATTERN_FLOAT + r""")\s+Z valence""")
REGEX_Z_VALENCE_V2 = re.compile(r"""\s*z_valence\s*=\s*['"]\s*(?P<z_valence>""" + PATTERN_FLOAT + r""")\s*['"].*""")

def parse_element(stream: BinaryIO):

def parse_element(content: str):
"""Parse the content of the UPF file to determine the element.
:param stream: a filelike object with the binary content of the file.
:return: the symbol of the element following the IUPAC naming standard.
"""
lines = stream.read().decode('utf-8')
match = REGEX_ELEMENT_V2.search(lines)
for regex in [REGEX_ELEMENT_V2, REGEX_ELEMENT_V1]:

match = regex.search(content)

if match:
return match.group('element')

raise ValueError(f'could not parse the element from the UPF content: {content}')


def parse_z_valence(content: str) -> int:
"""Parse the content of the UPF file to determine the Z valence.
:param stream: a filelike object with the binary content of the file.
:return: the Z valence.
"""
for regex in [REGEX_Z_VALENCE_V2, REGEX_Z_VALENCE_V1]:

match = regex.search(content)

if match:
z_valence = match.group('z_valence')

if match:
return match.group('element')
try:
z_valence = float(z_valence)
except ValueError as exception:
raise ValueError(f'parsed value for the Z valence `{z_valence}` is not a valid number.') from exception

match = REGEX_ELEMENT_V1.search(lines)
if int(z_valence) != z_valence:
raise ValueError(f'parsed value for the Z valence `{z_valence}` is not an integer.')

if match:
return match.group('element')
return int(z_valence)

raise ValueError('could not parse the element from the UPF content.')
raise ValueError(f'could not parse the Z valence from the UPF content: {content}')


class UpfData(PseudoPotentialData):
"""Data plugin to represent a pseudo potential in UPF format."""

_key_z_valence = 'z_valence'

def set_file(self, stream: BinaryIO, filename: str = None, **kwargs): # pylint: disable=arguments-differ
"""Set the file content.
Expand All @@ -42,6 +70,30 @@ def set_file(self, stream: BinaryIO, filename: str = None, **kwargs): # pylint:
:raises ValueError: if the element symbol is invalid.
"""
stream.seek(0)
self.element = parse_element(stream)

content = stream.read().decode('utf-8')
self.element = parse_element(content)
self.z_valence = parse_z_valence(content)

stream.seek(0)
super().set_file(stream, filename, **kwargs)

@property
def z_valence(self) -> Union[int, None]:
"""Return the Z valence.
:return: the Z valence.
"""
return self.get_attribute(self._key_z_valence, None)

@z_valence.setter
def z_valence(self, value: int):
"""Set the Z valence.
:param value: the Z valence.
:raises ValueError: if the value is not a positive integer.
"""
if not isinstance(value, int) or value < 0:
raise ValueError(f'`{value}` is not a positive integer')

self.set_attribute(self._key_z_valence, value)
19 changes: 19 additions & 0 deletions tests/data/pseudo/test_upf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from aiida.common.exceptions import ModificationNotAllowed
from aiida_pseudo.data.pseudo import UpfData
from aiida_pseudo.data.pseudo.upf import parse_z_valence


@pytest.mark.usefixtures('clear_db')
Expand Down Expand Up @@ -38,3 +39,21 @@ def test_set_file(filepath_pseudos, get_pseudo_potential_data):

with pytest.raises(ModificationNotAllowed):
pseudo.set_file(handle)


@pytest.mark.parametrize(
'content', (
'z_valence="1"',
'z_valence="1.0"',
'z_valence="1.000"',
'z_valence="1.00E+01"',
'z_valence="1500."',
"z_valence='1.0'",
'z_valence=" 1"',
'z_valence="1 "',
'1.0 Z valence',
)
)
def test_parse_z_valence(content):
"""Test the ``parse_z_valence`` method."""
assert parse_z_valence(content)
1 change: 1 addition & 0 deletions tests/fixtures/pseudos/upf/Ar.upf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
author="sphuber"
date="200411"
element="Ar"
z_valence="1.0"
pseudo_type="NC"
/>
</UPF>
1 change: 1 addition & 0 deletions tests/fixtures/pseudos/upf/He.upf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
author="sphuber"
date="200411"
element="He"
z_valence="1.0"
pseudo_type="NC"
/>
</UPF>
1 change: 1 addition & 0 deletions tests/fixtures/pseudos/upf/Kr.upf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
author="sphuber"
date="200411"
element="Kr"
z_valence="1.0"
pseudo_type="NC"
/>
</UPF>
1 change: 1 addition & 0 deletions tests/fixtures/pseudos/upf/Ne.upf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
author="sphuber"
date="200411"
element="Ne"
z_valence="1.0"
pseudo_type="NC"
/>
</UPF>
1 change: 1 addition & 0 deletions tests/fixtures/pseudos/upf/Rn.upf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
author="sphuber"
date="200411"
element="Rn"
z_valence="1.0"
pseudo_type="NC"
/>
</UPF>

0 comments on commit 33e6ac9

Please sign in to comment.