Skip to content

Commit

Permalink
Remove lxml dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
nigelpalmer committed Dec 13, 2021
1 parent de643e1 commit 039ef85
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 25 deletions.
75 changes: 52 additions & 23 deletions src/forcefield.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,11 @@
logger = getLogger(__name__)

try:
from lxml import etree
# from lxml import etree
from xml.etree import ElementTree as etree # lxml.etree module implements the extended ElementTree API for XML
except:
logger.warning("Failed to import lxml module, needed by OpenMM engine")
# logger.warning("Failed to import lxml module, needed by OpenMM engine") # RVK replaced lxml with xml.etree
logger.warning("Failed to import xml module, needed by OpenMM engine")

FF_Extensions = {"itp" : "gmx",
"top" : "gmx",
Expand Down Expand Up @@ -372,7 +374,7 @@ def addff(self,ffname,xmlScript=False):
Next, parse the file. Currently we support two classes of
files - text and XML. The two types are treated very
differently; for XML we use the parsers in libxml (via the
python lxml module), and for text files we have our own
python xml module), and for text files we have our own
in-house parsing class. Within text files, there is also a
specialized GROMACS and TINKER parser as well as a generic
text parser.
Expand All @@ -392,7 +394,7 @@ def addff(self,ffname,xmlScript=False):
--- If XML: ---
The force field file is read in using the lxml Python module. Specify
The force field file is read in using the xml Python module. Specify
which parameter you want to fit using by adding a 'parameterize' element
to the end of the force field XML file, like so.
Expand Down Expand Up @@ -484,7 +486,8 @@ def addff(self,ffname,xmlScript=False):
try:
self.ffdata[ffname] = etree.parse(absff)
except NameError:
logger.error("If etree not defined, please check if lxml module has been installed")
# logger.error("If etree not defined, please check if lxml module has been installed") # RVK replaced lxml with xml.etree
logger.error("If etree not defined, please check if xml module has been installed")
raise
self.ffdata_isxml[ffname] = True
# Process the file
Expand Down Expand Up @@ -664,7 +667,7 @@ def addff_xml(self, ffname):
change a force field parameter. I can create a list of tree
elements (essentially pointers to elements within a tree), but
this method breaks down when I copy the tree because I have no
way to refer to the copied tree elements. Fortunately, lxml
way to refer to the copied tree elements. Fortunately, xml
gives me a way to represent a tree using a flat list, and my
XML file 'locations' are represented using the positions in
the list.
Expand Down Expand Up @@ -697,16 +700,28 @@ def addff_xml(self, ffname):
self.addff(ffnameScript, xmlScript=True)
os.unlink(absScript)

for e in self.ffdata[ffname].getroot().xpath('//@parameterize/..'):
parameters_to_optimize = [i.strip() for i in e.get('parameterize').split(',')]
# for e in self.ffdata[ffname].getroot().xpath('//@parameterize/..'): # RVK replaced to work with xml.etree
for e in self.ffdata[ffname].getroot().findall('.//*[@parameterize]'):# RVK WORKS
# print(f"ffname => {ffname}")
# print(f"xml output => {etree.tostring(e)}")
# parameters_to_optimize = [i.strip() for i in e.get('parameterize').split(',')] # RVK replaced with xml.etree
parameters_to_optimize = [i.strip() for i in e.attrib['parameterize'].split(',')]
# print(f"params to optimize=> {parameters_to_optimize}")
for p in parameters_to_optimize:
# print(f"p ==> {p}")
# print(f"e.attrib => {e.attrib}")
if p not in e.attrib:
logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (p, e.get('type'), ffname) )
# print("RVK LOGGER ERROR")
# logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (p, e.get('type'), ffname) ) # RVK replaced to work with xml.etree
logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (p, e.attrib['type'], ffname) )
raise RuntimeError
pid = self.Readers[ffname].build_pid(e, p)
element_tree = self.ffdata[ffname] # RVK added this
pid = self.Readers[ffname].build_pid(e, p, element_tree) # RVK edited corresponding function in smirnoffio.py
# pid = self.Readers[ffname].build_pid(e, p)
self.map[pid] = self.np
# offxml file later than v0.3 may have unit strings in the field
quantity_str = e.get(p)
# quantity_str = e.get(p) # RVK replaced with xml.etree
quantity_str = e.attrib[p]
res = re.search(r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?', quantity_str)
value_str, unit_str = quantity_str[:res.end()], quantity_str[res.end():]
self.assign_p0(self.np, float(value_str))
Expand All @@ -715,38 +730,52 @@ def addff_xml(self, ffname):
self.np += 1
self.patoms.append([])

for e in self.ffdata[ffname].getroot().xpath('//@parameter_repeat/..'):
for field in e.get('parameter_repeat').split(','):
# for e in self.ffdata[ffname].getroot().xpath('//@parameter_repeat/..'): # RVK replaced to work with xml.etree
for e in self.ffdata[ffname].getroot().findall('.//*[@parameter_repeat]'):
# for field in e.get('parameter_repeat').split(','): # RVK replaced with xml.etree
for field in e.attrib['parameter_repeat'].split(','):
parameter_name = field.strip().split('=', 1)[0]
if parameter_name not in e.attrib:
logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (parameter_name, e.get('type'), ffname) )
# logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (parameter_name, e.get('type'), ffname) ) # RVK replaced with xml.etree
logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (parameter_name, e.attrib['type'], ffname) )
raise RuntimeError
dest = self.Readers[ffname].build_pid(e, parameter_name)
element_tree = self.ffdata[ffname] # RVK added this
dest = self.Readers[ffname].build_pid(e, parameter_name, element_tree) # RVK edited corresponding function in smirnoffio.py
# dest = self.Readers[ffname].build_pid(e, parameter_name)
src = field.strip().split('=', 1)[1]
if src in self.map:
self.map[dest] = self.map[src]
else:
warn_press_key("Warning: You wanted to copy parameter from %s to %s, but the source parameter does not seem to exist!" % (src, dest))
self.assign_field(self.map[dest],dest,ffname,fflist.index(e),parameter_name,1)
quantity_str = e.get(parameter_name)
# quantity_str = e.get(parameter_name) # RVK replaced with xml.etree
quantity_str = e.attrib[parameter_name]
res = re.search(r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?', quantity_str)
value_str, unit_str = quantity_str[:res.end()], quantity_str[res.end():]
quantity_str = e.get(parameter_name)
# quantity_str = e.get(parameter_name) # RVK replaced to work with xml.etree
quantity_str = e.attrib[parameter_name]
self.offxml_unit_strs[dest] = unit_str

for e in self.ffdata[ffname].getroot().xpath('//@parameter_eval/..'):
for field in split(r',(?![^\[]*[\]])', e.get('parameter_eval')):
# for e in self.ffdata[ffname].getroot().xpath('//@parameter_eval/..'): # RVK replaced to work with xml.etree
for e in self.ffdata[ffname].getroot().findall('.//*[@parameter_eval]/..'):
# for field in split(r',(?![^\[]*[\]])', e.get('parameter_eval')): # RVK replaced to work with xml.etree
for field in split(r',(?![^\[]*[\]])', e.attrib['parameter_eval']):
parameter_name = field.strip().split('=', 1)[0]
if parameter_name not in e.attrib:
logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (parameter_name, e.get('type'), ffname) )
# logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (parameter_name, e.get('type'), ffname) )# RVK replaced to work with xml.etree
logger.error("Parameter \'%s\' is not found for \'%s\', please check %s" % (parameter_name, e.attrib['type'], ffname) )
raise RuntimeError
dest = self.Readers[ffname].build_pid(e, parameter_name)
element_tree = self.ffdata[ffname] # RVK added this
dest = self.Readers[ffname].build_pid(e, parameter_name, element_tree) # RVK edited corresponding function in smirnoffio.py
# dest = self.Readers[ffname].build_pid(e, parameter_name)
evalcmd = field.strip().split('=', 1)[1]
self.assign_field(None,dest,ffname,fflist.index(e),parameter_name,None,evalcmd)
quantity_str = e.get(parameter_name)
# quantity_str = e.get(parameter_name) # RVK replaced to work with xml.etree
quantity_str = e.attrib[parameter_name]
res = re.search(r'^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?', quantity_str)
value_str, unit_str = quantity_str[:res.end()], quantity_str[res.end():]
quantity_str = e.get(parameter_name)
# quantity_str = e.get(parameter_name) # RVK replaced to work with xml.etree
quantity_str = e.attrib[parameter_name]
self.offxml_unit_strs[dest] = unit_str

def make(self,vals=None,use_pvals=False,printdir=None,precision=12):
Expand Down
42 changes: 40 additions & 2 deletions src/smirnoffio.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from forcebalance.opt_geo_target import OptGeoTarget
from forcebalance.torsion_profile import TorsionProfileTarget
import networkx as nx
from networkx.algorithms.dag import ancestors
import numpy as np
import sys
from forcebalance.finite_difference import *
Expand Down Expand Up @@ -124,10 +125,47 @@ def __init__(self,fnm):
## The parameter dictionary (defined in this file)
self.pdict = pdict

def build_pid(self, element, parameter):
# RVK added this function to recursively get the ancestors with python xml
def get_ancestors_recursively(self, element_tree, element, ancestors):
# while True:
if element_tree.getroot().find(".//{0}/..".format(element.tag)):
parent = element_tree.getroot().find(".//{0}/..".format(element.tag))
# print(f"parent->{parent}")
ancestors.append(parent)
# print(f"ancestors->{ancestors}")
element_new = parent
# print(f"element_new->{element_new}")
result = self.get_ancestors_recursively(element_tree, element_new, ancestors)
if result is not None:
return ancestors
else:
# print("in break")
return ancestors

# return ancestors

def build_pid(self, element, parameter, element_tree):
""" Build the parameter identifier (see _link_ for an example)
@todo Add a link here """
ParentType = ".".join([i.tag for i in list(element.iterancestors())][::-1][1:])
## print(f"lxml=>element.iterancestors()=>{element.iterancestors()}")#<lxml.etree.AncestorsIterator object at 0x0000013B5D737E00>
## print(f"lxml=>list(element.iterancestors())=>{list(element.iterancestors())}")#[<Element ProperTorsions at 0x13b5d730640>, <Element SMIRNOFF at 0x13b5d725e00>]
## print(f"element=>{element}")
## print(f"element_tree=>{element_tree}")
#parent = element_tree.getroot().find(".//{0}/..".format(element.tag)) # "parent=> <Element 'ProperTorsions' at 0x0000016665C14270>" ; findall gives array while find gives just 1. So using find.
## print(f"find element parent=>{parent}")
#parent1 = element_tree.getroot().find(".//{0}/..".format(parent.tag)) # parent1=> <Element 'SMIRNOFF' at 0x0000016665BFEA40>
# print(f"find parent[0].tag parent1=>{parent1}")
#ancestors = []
#ancestors.append(parent)
#ancestors.append(parent1)
#print(f"ancestors=>{ancestors}") #ancestors=>[<Element 'ProperTorsions' at 0x000001B58E42C2C0>, <Element 'SMIRNOFF' at 0x000001B58E41DA90>]
ancestors = []
ancestors = self.get_ancestors_recursively(element_tree, element, ancestors)
# print(f"harcoded get ancestors=>{ancestors}")

# ParentType = ".".join([i.tag for i in list(element.iterancestors())][::-1][1:]) # RVK replaced lxml with xml.etree.ElementTree
ParentType = ".".join([i.tag for i in ancestors][::-1][1:])
# print(f"RVK ParentType=>{ParentType}")
InteractionType = element.tag
try:
Involved = element.attrib["smirks"]
Expand Down

0 comments on commit 039ef85

Please sign in to comment.