diff --git a/armi/bookkeeping/__init__.py b/armi/bookkeeping/__init__.py index 89a17adcb..6e34f11f1 100644 --- a/armi/bookkeeping/__init__.py +++ b/armi/bookkeeping/__init__.py @@ -22,7 +22,7 @@ class BookkeepingPlugin(plugins.ArmiPlugin): @staticmethod @plugins.HOOKIMPL def exposeInterfaces(cs): - from armi.bookkeeping.db import database3 + from armi.bookkeeping.db import databaseInterface from armi.bookkeeping import historyTracker from armi.bookkeeping import memoryProfiler from armi.bookkeeping import mainInterface @@ -31,7 +31,7 @@ def exposeInterfaces(cs): interfaceInfo = [] interfaceInfo += plugins.collectInterfaceDescriptions(mainInterface, cs) - interfaceInfo += plugins.collectInterfaceDescriptions(database3, cs) + interfaceInfo += plugins.collectInterfaceDescriptions(databaseInterface, cs) interfaceInfo += plugins.collectInterfaceDescriptions(historyTracker, cs) interfaceInfo += plugins.collectInterfaceDescriptions(memoryProfiler, cs) interfaceInfo += plugins.collectInterfaceDescriptions(reportInterface, cs) diff --git a/armi/bookkeeping/db/__init__.py b/armi/bookkeeping/db/__init__.py index 60f3979be..96e627598 100644 --- a/armi/bookkeeping/db/__init__.py +++ b/armi/bookkeeping/db/__init__.py @@ -18,39 +18,47 @@ As an ARMI run progresses, this is periodically updated as the primary output file. It can also be an input file for follow-on analysis or restart runs. -The database can be visualized through various tools such as XTVIEW. - This module contains factories for selecting and building DB-related objects. -Some notes on versions ----------------------- -Persistent storage of ARMI models has seen many changes throughout the years. -Calculation results were originally stored on a SQL database (version 1), which has been -fully deprecated at this point. - -Version 2 was the first to use HDF5 as the primary storage format. This was beneficial, -as it did not rely on any external infrastructure to operate, and benefited from the -suite of tools that help interact with HDF5 files. It was eventually replaced because -it did not store a complete model of the reactor, but rather a ghost of assembly, block, -and reactor parameters that could be applied to an existing reactor model (so long as -the dimensions were consistent!). This led to loading reactors being inconvenient and -error-prone, and also posed a limitation for representing more complex systems that have -non-core components. - -Version 3 was created to make the schema more flexible and to permit storing the entire -reactor model within the HDF5 file. All objects in the ARMI Composite Model are written -to the database, and the model can be recovered in its entirety just from the HDF5 file. -Since it's inception, it has seen a number of tweaks to improve its functionality and -fix bugs. - -Being a serialization format, the code associated with reading and writing database -files may not benefit from Don't Repeat Yourself (DRY) practices in the same way as -other code. Therefore, we do not share much, if any, code between different major -versions of the databases. As such, new major-versioned database implementations should -exist in their own modules. Minor revisions (e.g. M.(N+1)) to the database structure -should be simple enough that specialized logic can be used to support all minor versions -without posing a maintenance burden. A detailed change log should be maintained of each -minor revision. +When updating a db version +-------------------------- +The code associated with reading and writing database files may not benefit from Don't +Repeat Yourself (DRY) practices in the same way as other code. Therefore, do not share +code between different major versions of the databases. Create a new module if you are +creating a new major database version. + + +Database revision changelog +--------------------------- + - 1: Originally, calculation results were stored in a SQL database. + + - 2: The storage format was changed to HDF5. This required less external + infrastructure than SQL. However, the implementation did not store a complete + model of a reactor, but a ghost of assembly, block, and reactor parameters that + could be applied to an existing reactor model (so long as the dimensions were + consistent). This was inconvenient and error prone. + + - 3: The HDF5 format was kept, but the schema was made more flexible to permit + storing the entire reactor model. All objects in the ARMI Composite Model are + written to the database, and the model can be completely recovered from just the + HDF5 file. + + - 3.1: Improved the handling of reading/writing grids. + + - 3.2: Changed the strategy for storing large attributes to using a special + string starting with an "@" symbol (e.g., "@/c00n00/attrs/5_linkedDims"). This + was done to support copying time node datasets from one file to another without + invalidating the references. Support was maintained for reading previous + versions, by performing a ``mergeHistory()`` and converting to the new naming + strategy, but the old version cannot be written. + + - 3.3: Compressed the way locations are stored in the database and allow + MultiIndex locations to be read and written. + + - 3.4: Modified the way locations are stored in the database to include complete + indices for indices that can be composed from multiple grids. Having complete + indices allows for more efficient means of extracting information based on + location, without having to compose the full model. """ import os from typing import Optional, List, Tuple @@ -62,7 +70,8 @@ # re-export package components for easier import from .permissions import Permissions -from .database3 import Database3, DatabaseInterface, updateGlobalAssemblyNum +from .database3 import Database3, updateGlobalAssemblyNum +from .databaseInterface import DatabaseInterface from .compareDB3 import compareDatabases from .factory import databaseFactory diff --git a/armi/bookkeeping/db/database3.py b/armi/bookkeeping/db/database3.py index 3ce6183be..a86715a03 100644 --- a/armi/bookkeeping/db/database3.py +++ b/armi/bookkeeping/db/database3.py @@ -13,48 +13,23 @@ # limitations under the License. """ -ARMI Database implementation, version 3. - -This Implementation of the database is a significant departure from the previous. One of -the foundational concepts in this version is that a reactor model should be fully -recoverable from the database itself; all the way down to the component level. As a -result, the structure of the underlying data is bound to the hierarchical Composite -Reactor Model, rather than an ad hoc collection of Block parameter fields and other -parameters. Furthermore, this format is intended to be more dynamic, permitting as-yet -undeveloped levels and classes in the Composite Reactor Model to be supported as they -are added. More high-level discussion is contained in :doc:`/user/outputs/database`. - -The most important contents of this module are the :py:class:`DatabaseInterface`, the -:py:class:`Database3` class, the :py:class:`Layout` class, and the special data -packing/unpacking functions. The ``Database3`` class contains most of the functionality -for interacting with the underlying data. This includes things like dumping a Reactor -state to the database and loading it back again, as well as extracting historical data -for a given object or collection of object from the database file. When interacting with -the database file, the ``Layout`` class is used to help map the hierarchical Composite -Reactor Model to the flat representation in the database. - -Refer to :py:mod:`armi.bookkeeping.db` for notes about versioning. - -Minor revision changelog ------------------------- - - 3.1: Improve the handling of reading/writing grids. - - - 3.2: Change the strategy for storing large attributes from using an Object Reference - to an external dataset to using a special string starting with an "@" symbol (e.g., - "@/c00n00/attrs/5_linkedDims"). This was done to support copying time node datasets - from one file to another without invalidating the references. Support is maintained - for reading previous versions, and for performing a ``mergeHistory()`` and converting - to the new reference strategy, but the old version cannot be written. - - - 3.3: Compress the way locations are stored in the database and allow MultiIndex - locations to be read and written. - - - 3.4: Modified the way that locations are stored in the database to include complete - indices for indices that can be composed from multiple grids. This was done since the - space is already being used to be able to store them, and because having complete - indices allows for more efficient means of extracting information based on location - without having to compose the full model. - +ARMI Database implementation, version 3.4. + +A reactor model should be fully recoverable from the database; all the way down to the +component level. As a result, the structure of the underlying data is bound to the +hierarchical Composite Reactor Model. Furthermore, this database format is intended to +be more dynamic, permitting as-yet undeveloped levels and classes in the Composite +Reactor Model to be supported as they are added. More high-level discussion is +contained in :doc:`/user/outputs/database`. + +The :py:class:`Database3` class contains most of the functionality for interacting +with the underlying data. This includes things like dumping a Reactor state to the +database and loading it back again, as well as extracting historical data for a given +object or collection of object from the database file. However, for the nitty-gritty +details of how the hierarchical Composite Reactor Model is translated to the flat file +database, please refer to :py:mod:`armi.bookkeeping.db.layout`. + +Refer to :py:mod:`armi.bookkeeping.db` for information about versioning. """ import collections import copy @@ -66,7 +41,6 @@ import shutil import subprocess import sys -import time from platform import uname from typing import ( Optional, @@ -76,7 +50,6 @@ Any, List, Sequence, - MutableSequence, Generator, ) @@ -85,15 +58,19 @@ from armi import context from armi import getApp -from armi import interfaces from armi import meta from armi import runLog from armi import settings +from armi.bookkeeping.db.layout import ( + Layout, + DB_VERSION, + replaceNonesWithNonsense, + replaceNonsenseWithNones, +) from armi.reactor import parameters from armi.reactor.parameters import parameterCollections -from armi.reactor.parameters import parameterDefinitions from armi.reactor.flags import Flags -from armi.reactor.reactors import Reactor, Core +from armi.reactor.reactors import Core from armi.reactor import assemblies from armi.reactor.assemblies import Assembly from armi.reactor.blocks import Block @@ -102,47 +79,30 @@ from armi.reactor import grids from armi.bookkeeping.db.typedefs import History, Histories from armi.reactor import systemLayoutInput -from armi.utils import getPreviousTimeNode, getStepLengths from armi.utils.textProcessors import resolveMarkupInclusions from armi.nucDirectory import nuclideBases -from armi.settings.fwSettings.databaseSettings import ( - CONF_SYNC_AFTER_WRITE, - CONF_FORCE_DB_PARAMS, -) - -ORDER = interfaces.STACK_ORDER.BOOKKEEPING -DB_MAJOR = 3 -DB_MINOR = 4 -DB_VERSION = f"{DB_MAJOR}.{DB_MINOR}" - -ATTR_LINK = re.compile("^@(.*)$") +# CONSTANTS _SERIALIZER_NAME = "serializerName" _SERIALIZER_VERSION = "serializerVersion" -LOC_NONE = "N" -LOC_COORD = "C" -LOC_INDEX = "I" -LOC_MULTI = "M:" - -LOCATION_TYPE_LABELS = { - type(None): LOC_NONE, - grids.CoordinateLocation: LOC_COORD, - grids.IndexLocation: LOC_INDEX, - grids.MultiIndexLocation: LOC_MULTI, -} +def getH5GroupName(cycle: int, timeNode: int, statePointName: str = None) -> str: + """ + Naming convention specifier. -def getH5GroupName(cycle, timeNode, statePointName=None): + ARMI defines the naming convention cXXnYY for groups of simulation data. + That is, data is grouped by cycle and time node information during a + simulated run. + """ return "c{:0>2}n{:0>2}{}".format(cycle, timeNode, statePointName or "") -def describeInterfaces(cs): - """Function for exposing interface(s) to other code""" - return (DatabaseInterface, {"enabled": cs["db"]}) - - -def updateGlobalAssemblyNum(r): +def updateGlobalAssemblyNum(r) -> None: + """ + Updated the global assembly number counter in ARMI, using the assemblies + read from a database. + """ assemNum = r.core.p.maxAssemNum if assemNum is not None: assemblies.setAssemNumCounter(int(assemNum + 1)) @@ -150,378 +110,6 @@ def updateGlobalAssemblyNum(r): raise ValueError("Could not load maxAssemNum from the database") -class DatabaseInterface(interfaces.Interface): - """ - Handles interactions between the ARMI data model and the persistent data storage - system. - - This reads/writes the ARMI state to/from the database and helps derive state - information that can be derived. - """ - - name = "database" - - def __init__(self, r, cs): - interfaces.Interface.__init__(self, r, cs) - self._db = None - self._dbPath: Optional[pathlib.Path] = None - - if cs[CONF_FORCE_DB_PARAMS]: - toSet = {paramName: set() for paramName in cs[CONF_FORCE_DB_PARAMS]} - for (name, _), pDef in parameterDefinitions.ALL_DEFINITIONS.items(): - if name in toSet.keys(): - toSet[name].add(pDef) - - for name, pDefs in toSet.items(): - runLog.info( - "Forcing parameter {} to be written to the database, per user " - "input".format(name) - ) - for pDef in pDefs: - pDef.saveToDB = True - - def __repr__(self): - return "<{} '{}' {} >".format( - self.__class__.__name__, self.name, repr(self._db) - ) - - @property - def database(self): - """ - Presents the internal database object, if it exists. - """ - if self._db is not None: - return self._db - else: - raise RuntimeError( - "The Database interface has not yet created a database " - "object. InteractBOL or loadState must be called first." - ) - - def interactBOL(self): - """Initialize the database if the main interface was not available. (Begining of Life)""" - if not self._db: - self.initDB() - - def initDB(self, fName: Optional[os.PathLike] = None): - """ - Open the underlying database to be written to, and write input files to DB. - - Notes - ----- - Main Interface calls this so that the database is available as early as - possible in the run. The database interface interacts near the end of the - interface stack (so that all the parameters have been updated) while the Main - Interface interacts first. - """ - if fName is None: - self._dbPath = pathlib.Path(self.cs.caseTitle + ".h5") - else: - self._dbPath = pathlib.Path(fName) - - if self.cs["reloadDBName"].lower() == str(self._dbPath).lower(): - raise ValueError( - "It appears that reloadDBName is the same as the case " - "title. This could lead to data loss! Rename the reload DB or the " - "case." - ) - self._db = Database3(self._dbPath, "w") - self._db.open() - - # Grab geomString here because the DB-level has no access to the reactor or - # blueprints or anything. - # There's not always a geomFile; we are moving towards the core grid definition - # living in the blueprints themselves. In this case, the db doesnt need to store - # a geomFile at all. - if self.cs["geomFile"]: - with open(os.path.join(self.cs.inputDirectory, self.cs["geomFile"])) as f: - geomString = f.read() - else: - geomString = "" - self._db.writeInputsToDB(self.cs, geomString=geomString) - - def interactEveryNode(self, cycle, node): - """ - Write to database. - - DBs should receive the state information of the run at each node. - """ - # skip writing for last burn step since it will be written at interact EOC - if node < self.o.burnSteps[cycle]: - self.r.core.p.minutesSinceStart = ( - time.time() - self.r.core.timeOfStart - ) / 60.0 - self._db.writeToDB(self.r) - if self.cs[CONF_SYNC_AFTER_WRITE]: - self._db.syncToSharedFolder() - - def interactEOC(self, cycle=None): - """In case anything changed since last cycle (e.g. rxSwing), update DB. (End of Cycle)""" - # We cannot presume whether we are at EOL based on cycle and cs["nCycles"], - # since cs["nCycles"] is not a difinitive indicator of EOL; ultimately the - # Operator has the final say. - if not self.o.atEOL: - self.r.core.p.minutesSinceStart = ( - time.time() - self.r.core.timeOfStart - ) / 60.0 - self._db.writeToDB(self.r) - - def interactEOL(self): - """DB's should be closed at run's end. (End of Life)""" - # minutesSinceStarts should include as much of the ARMI run as possible so EOL - # is necessary, too. - self.r.core.p.minutesSinceStart = (time.time() - self.r.core.timeOfStart) / 60.0 - self._db.writeToDB(self.r) - self._db.close(True) - - def interactError(self): - r"""Get shutdown state information even if the run encounters an error""" - try: - self.r.core.p.minutesSinceStart = ( - time.time() - self.r.core.timeOfStart - ) / 60.0 - - # this can result in a double-error if the error occurred in the database - # writing - self._db.writeToDB(self.r, "error") - self._db.close(False) - except: # pylint: disable=bare-except; we're already responding to an error - pass - - def interactDistributeState(self) -> None: - """ - Reconnect to pre-existing database. - - DB is created and managed by the primary node only but we can still connect to it - from workers to enable things like history tracking. - """ - if context.MPI_RANK > 0: - # DB may not exist if distribute state is called early. - if self._dbPath is not None and os.path.exists(self._dbPath): - self._db = Database3(self._dbPath, "r") - self._db.open() - - def distributable(self): - return self.Distribute.SKIP - - def prepRestartRun(self): - """ - Load the data history from the database requested in the case setting - `reloadDBName`. - - Reactor state is put at the cycle/node requested in the case settings - `startCycle` and `startNode`, having loaded the state from all cycles prior - to that in the requested database. - - Notes - ----- - Mixing the use of simple vs detailed cycles settings is allowed, provided - that the cycle histories prior to `startCycle`/`startNode` are equivalent. - """ - reloadDBName = self.cs["reloadDBName"] - runLog.info( - f"Merging database history from {reloadDBName} for restart analysis." - ) - startCycle = self.cs["startCycle"] - startNode = self.cs["startNode"] - - with Database3(reloadDBName, "r") as inputDB: - loadDbCs = inputDB.loadCS() - - # pull the history up to the cycle/node prior to `startCycle`/`startNode` - dbCycle, dbNode = getPreviousTimeNode( - startCycle, - startNode, - self.cs, - ) - - # check that cycle histories are equivalent up to this point - self._checkThatCyclesHistoriesAreEquivalentUpToRestartTime( - loadDbCs, dbCycle, dbNode - ) - - self._db.mergeHistory(inputDB, startCycle, startNode) - self.loadState(dbCycle, dbNode) - - def _checkThatCyclesHistoriesAreEquivalentUpToRestartTime( - self, loadDbCs, dbCycle, dbNode - ): - dbStepLengths = getStepLengths(loadDbCs) - currentCaseStepLengths = getStepLengths(self.cs) - dbStepHistory = [] - currentCaseStepHistory = [] - try: - for cycleIdx in range(dbCycle + 1): - if cycleIdx == dbCycle: - # truncate it at dbNode - dbStepHistory.append(dbStepLengths[cycleIdx][:dbNode]) - currentCaseStepHistory.append( - currentCaseStepLengths[cycleIdx][:dbNode] - ) - else: - dbStepHistory.append(dbStepLengths[cycleIdx]) - currentCaseStepHistory.append(currentCaseStepLengths[cycleIdx]) - except IndexError: - runLog.error( - f"DB cannot be loaded to this time: cycle={dbCycle}, node={dbNode}" - ) - raise - - if dbStepHistory != currentCaseStepHistory: - raise ValueError( - "The cycle history up to the restart cycle/node must be equivalent." - ) - - # TODO: The use of "yield" here is suspect. - def _getLoadDB(self, fileName): - """ - Return the database to load from in order of preference. - - Notes - ----- - If filename is present only returns one database since specifically instructed - to load from that database. - """ - if fileName is not None: - # only yield 1 database if the file name is specified - if self._db is not None and fileName == self._db._fileName: - yield self._db - elif os.path.exists(fileName): - yield Database3(fileName, "r") - else: - if self._db is not None: - yield self._db - if os.path.exists(self.cs["reloadDBName"]): - yield Database3(self.cs["reloadDBName"], "r") - - def loadState( - self, cycle, timeNode, timeStepName="", fileName=None, updateGlobalAssemNum=True - ): - """ - Loads a fresh reactor and applies it to the Operator. - - Notes - ----- - Will load preferentially from the `fileName` if passed. Otherwise will load from - existing database in memory or `cs["reloadDBName"]` in that order. - - Raises - ------ - RuntimeError - If fileName is specified and that file does not have the time step. - If fileName is not specified and neither the database in memory, nor the - `cs["reloadDBName"]` have the time step specified. - """ - for potentialDatabase in self._getLoadDB(fileName): - with potentialDatabase as loadDB: - if loadDB.hasTimeStep(cycle, timeNode, statePointName=timeStepName): - newR = loadDB.load( - cycle, - timeNode, - statePointName=timeStepName, - cs=self.cs, - allowMissing=True, - updateGlobalAssemNum=updateGlobalAssemNum, - ) - self.o.reattach(newR, self.cs) - break - else: - # reactor was never set so fail - if fileName: - raise RuntimeError( - "Cannot load state from specified file {} @ {}".format( - fileName, getH5GroupName(cycle, timeNode, timeStepName) - ) - ) - raise RuntimeError( - "Cannot load state from @ {}".format( - getH5GroupName(cycle, timeNode, timeStepName) - ) - ) - - def getHistory( - self, - comp: ArmiObject, - params: Optional[Sequence[str]] = None, - timeSteps: Optional[MutableSequence[Tuple[int, int]]] = None, - byLocation: bool = False, - ) -> History: - """ - Get historical parameter values for a single object. - - This is mostly a wrapper around the same function on the ``Database3`` class, - but knows how to return the current value as well. - - See Also - -------- - Database3.getHistory - """ - # make a copy so that we can potentially remove timesteps without affecting the - # caller - timeSteps = copy.copy(timeSteps) - now = (self.r.p.cycle, self.r.p.timeNode) - nowRequested = timeSteps is None - if timeSteps is not None and now in timeSteps: - nowRequested = True - timeSteps.remove(now) - - if byLocation: - history = self.database.getHistoryByLocation(comp, params, timeSteps) - else: - history = self.database.getHistory(comp, params, timeSteps) - - if nowRequested: - for param in params or history.keys(): - if param == "location": - history[param][now] = tuple(comp.spatialLocator.indices) - else: - history[param][now] = comp.p[param] - - return history - - def getHistories( - self, - comps: Sequence[ArmiObject], - params: Optional[Sequence[str]] = None, - timeSteps: Optional[MutableSequence[Tuple[int, int]]] = None, - byLocation: bool = False, - ) -> Histories: - """ - Get historical parameter values for one or more objects. - - This is mostly a wrapper around the same function on the ``Database3`` class, - but knows how to return the current value as well. - - See Also - -------- - Database3.getHistories - """ - now = (self.r.p.cycle, self.r.p.timeNode) - nowRequested = timeSteps is None - if timeSteps is not None: - # make a copy so that we can potentially remove timesteps without affecting - # the caller - timeSteps = copy.copy(timeSteps) - if timeSteps is not None and now in timeSteps: - nowRequested = True - timeSteps.remove(now) - - if byLocation: - histories = self.database.getHistoriesByLocation(comps, params, timeSteps) - else: - histories = self.database.getHistories(comps, params, timeSteps) - - if nowRequested: - for c in comps: - for param in params or histories[c].keys(): - if param == "location": - histories[c][param][now] = c.spatialLocator.indices - else: - histories[c][param][now] = c.p[param] - - return histories - - class Database3: """ Version 3 of the ARMI Database, handling serialization and loading of Reactor states. @@ -1333,7 +921,7 @@ def _writeParams(self, h5group, comps): dataset = g.create_dataset(paramDef.name, data=data, compression="gzip") if any(attrs): - _writeAttrs(dataset, h5group, attrs) + Database3._writeAttrs(dataset, h5group, attrs) except Exception: runLog.error( "Failed to write {} to database. Data: " @@ -1393,7 +981,7 @@ def _readParams(h5group, compTypeName, comps, allowMissing=False): raise data = dataSet[:] - attrs = _resolveAttrs(dataSet.attrs, h5group) + attrs = Database3._resolveAttrs(dataSet.attrs, h5group) if pDef.serializer is not None: assert _SERIALIZER_NAME in dataSet.attrs @@ -1787,664 +1375,79 @@ def getHistories( return histData - -def _packLocations( - locations: List[grids.LocationBase], minorVersion: int = DB_MINOR -) -> Tuple[List[str], List[Tuple[int, int, int]]]: - """ - Extract information from a location needed to write it to this DB. - - Each locator has one locationType and up to N location-defining datums, - where N is the number of entries in a possible multiindex, or just 1 - for everything else. - - Shrink grid locator names for storage efficiency. - - Notes - ----- - Contains some conditionals to still load databases made before - db version 3.3 which can be removed once no users care about - those DBs anymore. - """ - if minorVersion <= 2: - locationTypes, locationData = _packLocationsV1(locations) - elif minorVersion == 3: - locationTypes, locationData = _packLocationsV2(locations) - elif minorVersion > 3: - locationTypes, locationData = _packLocationsV3(locations) - else: - raise ValueError("Unsupported minor version: {}".format(minorVersion)) - return locationTypes, locationData - - -def _packLocationsV1( - locations: List[grids.LocationBase], -) -> Tuple[List[str], List[Tuple[int, int, int]]]: - """Delete when reading v <=3.2 DB's no longer wanted.""" - locTypes = [] - locData: List[Tuple[int, int, int]] = [] - for loc in locations: - locationType = loc.__class__.__name__ - if loc is None: - locationType = "None" - locDatum = [(0.0, 0.0, 0.0)] - elif isinstance(loc, grids.IndexLocation): - locDatum = [loc.indices] - else: - raise ValueError(f"Invalid location type: {loc}") - - locTypes.append(locationType) - locData.extend(locDatum) - - return locTypes, locData - - -def _packLocationsV2( - locations: List[grids.LocationBase], -) -> Tuple[List[str], List[Tuple[int, int, int]]]: - """ - Location packing implementation for minor version 3. See release notes above. - """ - locTypes = [] - locData: List[Tuple[int, int, int]] = [] - for loc in locations: - locationType = LOCATION_TYPE_LABELS[type(loc)] - if loc is None: - locDatum = [(0.0, 0.0, 0.0)] - elif loc.__class__ is grids.CoordinateLocation: - locDatum = [loc.indices] - elif loc.__class__ is grids.IndexLocation: - locDatum = [loc.indices] - elif loc.__class__ is grids.MultiIndexLocation: - # encode number of sub-locations to allow in-line unpacking. - locationType += f"{len(loc)}" - locDatum = [subloc.indices for subloc in loc] - else: - raise ValueError(f"Invalid location type: {loc}") - - locTypes.append(locationType) - locData.extend(locDatum) - - return locTypes, locData - - -def _packLocationsV3( - locations: List[grids.LocationBase], -) -> Tuple[List[str], List[Tuple[int, int, int]]]: - """ - Location packing implementation for minor version 4. See release notes above. - """ - locTypes = [] - locData: List[Tuple[int, int, int]] = [] - - for loc in locations: - locationType = LOCATION_TYPE_LABELS[type(loc)] - if loc is None: - locDatum = [(0.0, 0.0, 0.0)] - elif type(loc) is grids.IndexLocation: - locDatum = [loc.getCompleteIndices()] - elif type(loc) is grids.CoordinateLocation: - # CoordinateLocations do not implement getCompleteIndices properly, and we - # do not really have a motivation to store them as we do with index - # locations. - locDatum = [loc.indices] - elif type(loc) is grids.MultiIndexLocation: - locationType += f"{len(loc)}" - locDatum = [subloc.indices for subloc in loc] - else: - raise ValueError(f"Invalid location type: {loc}") - - locTypes.append(locationType) - locData.extend(locDatum) - - return locTypes, locData - - -def _unpackLocations(locationTypes, locData, minorVersion: int = DB_MINOR): - """ - Convert location data as read from DB back into data structure for building reactor model. - - location and locationType will only have different lengths when multiindex locations - are used. - """ - if minorVersion < 3: - return _unpackLocationsV1(locationTypes, locData) - else: - return _unpackLocationsV2(locationTypes, locData) - - -def _unpackLocationsV1(locationTypes, locData): - """Delete when reading v <=3.2 DB's no longer wanted.""" - locsIter = iter(locData) - unpackedLocs = [] - for lt in locationTypes: - if lt == "None": - loc = next(locsIter) - unpackedLocs.append(None) - elif lt == "IndexLocation": - loc = next(locsIter) - # the data is stored as float, so cast back to int - unpackedLocs.append(tuple(int(i) for i in loc)) - else: - loc = next(locsIter) - unpackedLocs.append(tuple(loc)) - return unpackedLocs - - -def _unpackLocationsV2(locationTypes, locData): - """ - Location unpacking implementation for minor version 3+. See release notes above. - """ - locsIter = iter(locData) - unpackedLocs = [] - for lt in locationTypes: - if lt == LOC_NONE: - loc = next(locsIter) - unpackedLocs.append(None) - elif lt == LOC_INDEX: - loc = next(locsIter) - # the data is stored as float, so cast back to int - unpackedLocs.append(tuple(int(i) for i in loc)) - elif lt == LOC_COORD: - loc = next(locsIter) - unpackedLocs.append(tuple(loc)) - elif lt.startswith(LOC_MULTI): - # extract number of sublocations from e.g. "M:345" string. - numSubLocs = int(lt.split(":")[1]) - multiLocs = [] - for _ in range(numSubLocs): - subLoc = next(locsIter) - # All multiindexes sublocs are index locs - multiLocs.append(tuple(int(i) for i in subLoc)) - unpackedLocs.append(multiLocs) - else: - raise ValueError(f"Read unknown location type {lt}. Invalid DB.") - - return unpackedLocs - - -class Layout: - """ - The Layout class describes the hierarchical layout of the composite Reactor model in a flat representation. - - A Layout is built up by starting at the root of a composite tree and recursively - appending each node in the tree to the list of data. So for a typical Reactor model, - the data will be ordered by depth-first search: [r, c, a1, a1b1, a1b1c1, a1b1c2, a1b2, - a1b2c1, ..., a2, ...]. - - The layout is also responsible for storing Component attributes, like location, - material, and temperatures (from blueprints), which aren't stored as Parameters. - Temperatures, specifically, are rather complicated beasts in ARMI, and more - fundamental changes to how we deal with them may allow us to remove them from - Layout. - - Notes - ----- - As this format is liable to be consumed by other code, it is important to specify - its structure so that code attempting to read/write Layouts can make safe - assumptions. Below is a list of things to be aware of. More will be added as issues - arise or things become more precise: - - * Elements in Layout are stored in depth-first order. This permits use of - algorithms such as Pre-Order Tree Traversal for efficient traversal of regions of - the model. - - * ``indexInData`` increases monotonically within each object ``type``. This means - that, for instance, the data for all ``HexBlock`` children of a given parent - are stored contiguously within the ``HexBlock`` group, and will not be - interleaved with data from the ``HexBlock`` children of any of the parent's - siblings. - - * Aside from the hierarchy itself, there is no guarantee what order objects are - stored in the layout. "`The` ``Core``" is not necessarily the first child of the - ``Reactor``, and is not guaranteed to use the zeroth grid. - """ - - def __init__(self, version: Tuple[int, int], h5group=None, comp=None): - self.type: List[str] = [] - self.name: List[str] = [] - self.serialNum: List[int] = [] - # The index into the parameter datasets corresponding to each object's class. - # E.g., the 5th HexBlock object in the tree would get 5; to look up its - # "someParameter" value, you would extract cXXnYY/HexBlock/someParameter[5]. - self.indexInData: List[int] = [] - # The number of direct children this object has. - self.numChildren: List[int] = [] - # The type of location that specifies the object's physical location; see the - # associated pack/unpackLocation functions for more information about how - # locations are handled. - self.locationType: List[str] = [] - # There is a minor asymmetry here in that before writing to the DB, this is - # truly a flat list of tuples. However when reading, this may contain lists of - # tuples, which represent MI locations. This comes from the fact that we map the - # tuples to Location objects in Database3._compose, but map from Locations to - # tuples in Layout._createLayout. Ideally we would handle both directions in the - # same place so this can be less surprising. Resolving this would require - # changing the interface of the various pack/unpack functions, which have - # multiple versions, so the update would need to be done with care. - self.location: List[Tuple[int, int, int]] = [] - # Which grid, as stored in the database, this object uses to arrange its - # children - self.gridIndex: List[int] = [] - self.temperatures: List[float] = [] - self.material: List[str] = [] - # Used to cache all of the spatial locators so that we can pack them all at - # once. The benefit here is that the version checking can happen up front and - # less branching down below - self._spatialLocators: List[grids.LocationBase] = [] - # set of grid parameters that have been seen in _createLayout. For efficient - # checks for uniqueness - self._seenGridParams: Dict[Any, Any] = dict() - # actual list of grid parameters, with stable order for safe indexing - self.gridParams: List[Any] = [] - self.version = version - - self.groupedComps: Dict[ - Type[ArmiObject], List[ArmiObject] - ] = collections.defaultdict(list) - - # it should be noted, one of the two inputs must be non-None: comp/h5group - if comp is not None: - self._createLayout(comp) - self.locationType, self.location = _packLocations(self._spatialLocators) - else: - self._readLayout(h5group) - - self._snToLayoutIndex = {sn: i for i, sn in enumerate(self.serialNum)} - - def __getitem__(self, sn): - layoutIndex = self._snToLayoutIndex[sn] - return ( - self.type[layoutIndex], - self.name[layoutIndex], - self.serialNum[layoutIndex], - self.indexInData[layoutIndex], - self.numChildren[layoutIndex], - self.locationType[layoutIndex], - self.location[layoutIndex], - self.temperatures[layoutIndex], - self.material[layoutIndex], - ) - - def _createLayout(self, comp): - """ - Populate a hierarchical representation and group the reactor model items by type. - - This is used when writing a reactor model to the database. - - Notes - ----- - This is recursive. - - See Also - -------- - _readLayout : does the opposite - """ - compList = self.groupedComps[type(comp)] - compList.append(comp) - - self.type.append(comp.__class__.__name__) - self.name.append(comp.name) - self.serialNum.append(comp.p.serialNum) - self.indexInData.append(len(compList) - 1) - self.numChildren.append(len(comp)) - - # determine how many components have been read in, to set the grid index - if comp.spatialGrid is not None: - gridType = type(comp.spatialGrid).__name__ - gridParams = (gridType, comp.spatialGrid.reduce()) - if gridParams not in self._seenGridParams: - self._seenGridParams[gridParams] = len(self.gridParams) - self.gridParams.append(gridParams) - self.gridIndex.append(self._seenGridParams[gridParams]) - else: - self.gridIndex.append(None) - - self._spatialLocators.append(comp.spatialLocator) - - # set the materials and temperatures - try: - self.temperatures.append((comp.inputTemperatureInC, comp.temperatureInC)) - self.material.append(comp.material.__class__.__name__) - except: - self.temperatures.append((-900, -900)) # an impossible temperature - self.material.append("") - - try: - comps = sorted(list(comp)) - except ValueError: - runLog.error( - "Failed to sort some collection of ArmiObjects for database output: {} " - "value {}".format(type(comp), list(comp)) - ) - raise - - # depth-first search recursion of all components - for c in comps: - self._createLayout(c) - - def _readLayout(self, h5group): + @staticmethod + def _writeAttrs(obj, group, attrs): """ - Populate a hierarchical representation and group the reactor model items by type. - - This is used when reading a reactor model from a database. - - See Also - -------- - _createLayout : does the opposite + Handle safely writing attributes to a dataset, handling large data if necessary. + + This will attempt to store attributes directly onto an HDF5 object if possible, + falling back to proper datasets and reference attributes if necessary. This is + needed because HDF5 tries to fit attributes into the object header, which has + limited space. If an attribute is too large, h5py raises a RuntimeError. + In such cases, this will store the attribute data in a proper dataset and + place a reference to that dataset in the attribute instead. + + In practice, this takes ``linkedDims`` attrs from a particular component type (like + ``c00n00/Circle/id``) and stores them in new datasets (like + ``c00n00/attrs/1_linkedDims``, ``c00n00/attrs/2_linkedDims``) and then sets the + object's attrs to links to those datasets. """ - try: - # location is either an index, or a point - # iter over list is faster - locations = h5group["layout/location"][:].tolist() - self.locationType = numpy.char.decode( - h5group["layout/locationType"][:] - ).tolist() - self.location = _unpackLocations( - self.locationType, locations, self.version[1] - ) - self.type = numpy.char.decode(h5group["layout/type"][:]) - self.name = numpy.char.decode(h5group["layout/name"][:]) - self.serialNum = h5group["layout/serialNum"][:] - self.indexInData = h5group["layout/indexInData"][:] - self.numChildren = h5group["layout/numChildren"][:] - self.material = numpy.char.decode(h5group["layout/material"][:]) - self.temperatures = h5group["layout/temperatures"][:] - self.gridIndex = replaceNonsenseWithNones( - h5group["layout/gridIndex"][:], "layout/gridIndex" - ) - - gridGroup = h5group["layout/grids"] - gridTypes = [t.decode() for t in gridGroup["type"][:]] - - self.gridParams = [] - for iGrid, gridType in enumerate(gridTypes): - thisGroup = gridGroup[str(iGrid)] - - unitSteps = thisGroup["unitSteps"][:] - bounds = [] - for ibound in range(3): - boundName = "bounds_{}".format(ibound) - if boundName in thisGroup: - bounds.append(thisGroup[boundName][:]) - else: - bounds.append(None) - unitStepLimits = thisGroup["unitStepLimits"][:] - offset = thisGroup["offset"][:] if thisGroup.attrs["offset"] else None - geomType = ( - thisGroup["geomType"].asstr()[()] - if "geomType" in thisGroup - else None - ) - symmetry = ( - thisGroup["symmetry"].asstr()[()] - if "symmetry" in thisGroup - else None - ) - - self.gridParams.append( - ( - gridType, - grids.GridParameters( - unitSteps, - bounds, - unitStepLimits, - offset, - geomType, - symmetry, - ), - ) - ) - - except KeyError as e: - runLog.error( - "Failed to get layout information from group: {}".format(h5group.name) - ) - raise e - - def _initComps(self, caseTitle, bp): - comps = [] - groupedComps = collections.defaultdict(list) - - for ( - compType, - name, - serialNum, - numChildren, - location, - material, - temperatures, - gridIndex, - ) in zip( - self.type, - self.name, - self.serialNum, - self.numChildren, - self.location, - self.material, - self.temperatures, - self.gridIndex, - ): - Klass = ArmiObject.TYPES[compType] - - if issubclass(Klass, Reactor): - comp = Klass(caseTitle, bp) - elif issubclass(Klass, Core): - comp = Klass(name) - elif issubclass(Klass, Component): - # XXX: initialize all dimensions to 0, they will be loaded and assigned - # after load - kwargs = dict.fromkeys(Klass.DIMENSION_NAMES, 0) - kwargs["material"] = material - kwargs["name"] = name - kwargs["Tinput"] = temperatures[0] - kwargs["Thot"] = temperatures[1] - comp = Klass(**kwargs) - else: - comp = Klass(name) + for key, value in attrs.items(): + try: + obj.attrs[key] = value + except RuntimeError as err: + if "object header message is too large" not in err.args[0]: + raise - if gridIndex is not None: - gridParams = self.gridParams[gridIndex] - comp.spatialGrid = GRID_CLASSES[gridParams[0]]( - *gridParams[1], armiObject=comp + runLog.info( + "Storing attribute `{}` for `{}` into it's own dataset within " + "`{}/attrs`".format(key, obj, group) ) - comps.append((comp, serialNum, numChildren, location)) - groupedComps[compType].append(comp) - - return comps, groupedComps - - def writeToDB(self, h5group): - if "layout/type" in h5group: - # It looks like we have already written the layout to DB, skip for now - return - try: - h5group.create_dataset( - "layout/type", - data=numpy.array(self.type).astype("S"), - compression="gzip", - ) - h5group.create_dataset( - "layout/name", - data=numpy.array(self.name).astype("S"), - compression="gzip", - ) - h5group.create_dataset( - "layout/serialNum", data=self.serialNum, compression="gzip" - ) - h5group.create_dataset( - "layout/indexInData", data=self.indexInData, compression="gzip" - ) - h5group.create_dataset( - "layout/numChildren", data=self.numChildren, compression="gzip" - ) - h5group.create_dataset( - "layout/location", data=self.location, compression="gzip" - ) - h5group.create_dataset( - "layout/locationType", - data=numpy.array(self.locationType).astype("S"), - compression="gzip", - ) - h5group.create_dataset( - "layout/material", - data=numpy.array(self.material).astype("S"), - compression="gzip", - ) - h5group.create_dataset( - "layout/temperatures", data=self.temperatures, compression="gzip" - ) - - h5group.create_dataset( - "layout/gridIndex", - data=replaceNonesWithNonsense( - numpy.array(self.gridIndex), "layout/gridIndex" - ), - compression="gzip", - ) - - gridsGroup = h5group.create_group("layout/grids") - gridsGroup.attrs["nGrids"] = len(self.gridParams) - gridsGroup.create_dataset( - "type", data=numpy.array([gp[0] for gp in self.gridParams]).astype("S") - ) - - for igrid, gridParams in enumerate(gp[1] for gp in self.gridParams): - thisGroup = gridsGroup.create_group(str(igrid)) - thisGroup.create_dataset("unitSteps", data=gridParams.unitSteps) - - for ibound, bound in enumerate(gridParams.bounds): - if bound is not None: - bound = numpy.array(bound) - thisGroup.create_dataset("bounds_{}".format(ibound), data=bound) - - thisGroup.create_dataset( - "unitStepLimits", data=gridParams.unitStepLimits - ) + if "attrs" not in group: + attrGroup = group.create_group("attrs") + else: + attrGroup = group["attrs"] + dataName = str(len(attrGroup)) + "_" + key + attrGroup[dataName] = value - offset = gridParams.offset - thisGroup.attrs["offset"] = offset is not None - if offset is not None: - thisGroup.create_dataset("offset", data=offset) - thisGroup.create_dataset("geomType", data=gridParams.geomType) - thisGroup.create_dataset("symmetry", data=gridParams.symmetry) - except RuntimeError: - runLog.error("Failed to create datasets in: {}".format(h5group)) - raise + # using a soft link here allows us to cheaply copy time nodes without + # needing to crawl through and update object references. + linkName = attrGroup[dataName].name + obj.attrs[key] = "@{}".format(linkName) @staticmethod - def computeAncestors(serialNum, numChildren, depth=1) -> List[Optional[int]]: + def _resolveAttrs(attrs, group): """ - Return a list containing the serial number of the parent corresponding to each - object at the given depth. - - Depth in this case means how many layers to reach up to find the desired - ancestor. A depth of 1 will yield the direct parent of each element, depth of 2 - would yield the elemen's parent's parent, and so on. - - The zero-th element will always be None, as the first object is the root element - and so has no parent. Subsequent depths will result in more Nones. + Reverse the action of _writeAttrs. - This function is useful for forming a lightweight sense of how the database - contents stitch together, without having to go to the trouble of fully unpacking - the Reactor model. - - Parameters - ---------- - serialNum : List of int - List of serial numbers for each object/element, as laid out in Layout - numChildren : List of int - List of numbers of children for each object/element, as laid out in Layout - - Note - ---- - This is not using a recursive approach for a couple of reasons. First, the - iterative form isn't so bad; we just need two stacks. Second, the interface of - the recursive function would be pretty unwieldy. We are progressively - consuming two lists, of which we would need to keep passing down with an - index/cursor, or progressively slice them as we go, which would be pretty - inefficient. + This reads actual attrs and looks for the real data + in the datasets that the attrs were pointing to. """ - ancestors: List[Optional[int]] = [None] - - snStack = [serialNum[0]] - ncStack = [numChildren[0]] - - for sn, nc in zip(serialNum[1:], numChildren[1:]): - ncStack[-1] -= 1 - if nc > 0: - ancestors.append(snStack[-1]) - snStack.append(sn) - ncStack.append(nc) - else: - ancestors.append(snStack[-1]) - - while ncStack and ncStack[-1] == 0: - snStack.pop() - ncStack.pop() - - if depth > 1: - # handle deeper scenarios. This is a bit tricky. Store the original - # ancestors for the first generation, since that ultimately contains all of - # the information that we need. Then in a loop, keep hopping one more layer - # of indirection, and indexing into the corresponding locaition in the - # original ancestor array - indexMap = {sn: i for i, sn in enumerate(serialNum)} - origAncestors = ancestors - for _ in range(depth - 1): - ancestors = [ - origAncestors[indexMap[ia]] if ia is not None else None - for ia in ancestors - ] - - return ancestors - - -def allSubclasses(cls): - """This currently include Materials... and it should not.""" - return set(cls.__subclasses__()).union( - [s for c in cls.__subclasses__() for s in allSubclasses(c)] - ) + attr_link = re.compile("^@(.*)$") + resolved = {} + for key, val in attrs.items(): + try: + if isinstance(val, h5py.h5r.Reference): + # Old style object reference. If this cannot be dereferenced, it is + # likely because mergeHistory was used to get the current database, + # which does not preserve references. + resolved[key] = group[val] + elif isinstance(val, str): + m = attr_link.match(val) + if m: + # dereference the path to get the data out of the dataset. + resolved[key] = group[m.group(1)][()] + else: + resolved[key] = val + else: + resolved[key] = val + except ValueError: + runLog.error(f"HDF error loading {key} : {val}\nGroup: {group}") + raise -# TODO: This will likely become an issue with extensibility via plugins. There are a -# couple of options to resolve this: -# - Perform this operation each time we make a Layout. Wasteful, but robust -# - Scrape all of these names off of a set of Composites that register with a base -# metaclass. Less wasteful, but probably equally robust. Downside is it's metaclassy -# and Madjickal. -GRID_CLASSES = {c.__name__: c for c in allSubclasses(grids.Grid)} -GRID_CLASSES["Grid"] = grids.Grid - - -NONE_MAP = {float: float("nan"), str: ""} - -# XXX: we're going to assume no one assigns min(int)+2 as a meaningful value -NONE_MAP.update( - { - intType: numpy.iinfo(intType).min + 2 - for intType in ( - int, - numpy.int8, - numpy.int16, - numpy.int32, - numpy.int64, - ) - } -) -NONE_MAP.update( - { - intType: numpy.iinfo(intType).max - 2 - for intType in ( - numpy.uint, - numpy.uint8, - numpy.uint16, - numpy.uint32, - numpy.uint64, - ) - } -) -NONE_MAP.update({floatType: floatType("nan") for floatType in (float, numpy.float64)}) + return resolved def packSpecialData( @@ -2492,9 +1495,7 @@ def packSpecialData( See Also -------- unpackSpecialData - """ - # Check to make sure that we even need to do this. If the numpy data type is # not "O", chances are we have nice, clean data. if data.dtype != "O": @@ -2680,250 +1681,6 @@ def unpackSpecialData(data: numpy.ndarray, attrs, paramName: str) -> numpy.ndarr ) -def replaceNonsenseWithNones(data: numpy.ndarray, paramName: str) -> numpy.ndarray: - """ - Replace special nonsense values with ``None``. - - This essentially reverses the operations performed by - :py:func:`replaceNonesWithNonsense`. - - Parameters - ---------- - data - The array from the database that contains special ``None`` nonsense values. - - paramName - The param name who's data we are dealing with. Only used for diagnostics. - - See Also - -------- - replaceNonesWithNonsense - """ - # TODO: This is super closely-related to the NONE_MAP collection, and should - # probably use it somehow. - if numpy.issubdtype(data.dtype, numpy.floating): - isNone = numpy.isnan(data) - elif numpy.issubdtype(data.dtype, numpy.integer): - isNone = data == numpy.iinfo(data.dtype).min + 2 - elif numpy.issubdtype(data.dtype, numpy.str_): - isNone = data == "" - else: - raise TypeError( - "Unable to resolve values that should be None for `{}`".format(paramName) - ) - - if data.ndim > 1: - result = numpy.ndarray(data.shape[0], dtype=numpy.dtype("O")) - for i in range(data.shape[0]): - if isNone[i].all(): - result[i] = None - elif isNone[i].any(): - # TODO: This is not symmetric with the replaceNonesWithNonsense impl. - # That one assumes that Nones apply only at the highest dimension, and - # that the lower dimensions will be filled with the magic None value. - # Non-none entries below the top level fail to coerce to a serializable - # numpy array and would raise an exception when trying to write. TL;DR: - # this is a dead branch until the replaceNonesWithNonsense impl is more - # sophisticated. - result[i] = numpy.array(data[i], dtype=numpy.dtype("O")) - result[i][isNone[i]] = None - else: - result[i] = data[i] - - else: - result = numpy.ndarray(data.shape, dtype=numpy.dtype("O")) - result[:] = data - result[isNone] = None - - return result - - -def replaceNonesWithNonsense( - data: numpy.ndarray, paramName: str, nones: numpy.ndarray = None -) -> numpy.ndarray: - """ - Replace instances of ``None`` with nonsense values that can be detected/recovered - when reading. - - Parameters - ---------- - data - The numpy array containing ``None`` values that need to be replaced. - - paramName - The name of the parameter who's data we are treating. Only used for diagnostics. - - nones - An array containing the index locations on the ``None`` elements. It is a little - strange to pass these, in but we find these indices to determine whether we need - to call this function in the first place, so might as well pass it in, so that - we don't need to perform the operation again. - - Notes - ----- - This only supports situations where the data is a straight-up ``None``, or a valid, - database-storable numpy array (or easily convertable to one (e.g. tuples/lists with - numerical values)). This does not support, for instance, a numpy ndarray with some - Nones in it. - - For example, the following is supported:: - - [[1, 2, 3], None, [7, 8, 9]] - - However, the following is not:: - - [[1, 2, 3], [4, None, 6], [7, 8, 9]] - - See Also - -------- - replaceNonsenseWithNones - Reverses this operation. - """ - if nones is None: - nones = numpy.where([d is None for d in data])[0] - - try: - # loop to find what the default value should be. This is the first non-None - # value that we can find. - defaultValue = None - realType = None - val = None - - for val in data: - if isinstance(val, numpy.ndarray): - # if multi-dimensional, val[0] could still be an array, val.flat is - # a flattened iterator, so next(val.flat) gives the first value in - # an n-dimensional array - realType = type(next(val.flat)) - - if realType is type(None): - continue - - defaultValue = numpy.reshape( - numpy.repeat(NONE_MAP[realType], val.size), val.shape - ) - break - else: - realType = type(val) - - if realType is type(None): - continue - - defaultValue = NONE_MAP[realType] - break - else: - # Couldn't find any non-None entries, so it really doesn't matter what type we - # use. Using float, because NaN is nice. - realType = float - defaultValue = NONE_MAP[realType] - - if isinstance(val, numpy.ndarray): - data = numpy.array([d if d is not None else defaultValue for d in data]) - else: - data[nones] = defaultValue - - except Exception as ee: - runLog.error( - "Error while attempting to determine default for {}.\nvalue: {}\nError: {}".format( - paramName, val, ee - ) - ) - raise TypeError( - "Could not determine None replacement for {} with type {}, val {}, default {}".format( - paramName, realType, val, defaultValue - ) - ) - - try: - data = data.astype(realType) - except: - raise ValueError( - "Could not coerce data for {} to {}, data:\n{}".format( - paramName, realType, data - ) - ) - - if data.dtype.kind == "O": - raise TypeError( - "Failed to convert data to valid HDF5 type {}, data:{}".format( - paramName, data - ) - ) - - return data - - -def _writeAttrs(obj, group, attrs): - """ - Handle safely writing attributes to a dataset, handling large data if necessary. - - This will attempt to store attributes directly onto an HDF5 object if possible, - falling back to proper datasets and reference attributes if necessary. This is - needed because HDF5 tries to fit attributes into the object header, which has - limited space. If an attribute is too large, h5py raises a RuntimeError. - In such cases, this will store the attribute data in a proper dataset and - place a reference to that dataset in the attribute instead. - - In practice, this takes ``linkedDims`` attrs from a particular component type (like - ``c00n00/Circle/id``) and stores them in new datasets (like - ``c00n00/attrs/1_linkedDims``, ``c00n00/attrs/2_linkedDims``) and then sets the - object's attrs to links to those datasets. - """ - for key, value in attrs.items(): - try: - obj.attrs[key] = value - except RuntimeError as err: - if "object header message is too large" not in err.args[0]: - raise - - runLog.info( - "Storing attribute `{}` for `{}` into it's own dataset within " - "`{}/attrs`".format(key, obj, group) - ) - - if "attrs" not in group: - attrGroup = group.create_group("attrs") - else: - attrGroup = group["attrs"] - dataName = str(len(attrGroup)) + "_" + key - attrGroup[dataName] = value - - # using a soft link here allows us to cheaply copy time nodes without - # needing to crawl through and update object references. - linkName = attrGroup[dataName].name - obj.attrs[key] = "@{}".format(linkName) - - -def _resolveAttrs(attrs, group): - """ - Reverse the action of _writeAttrs. - - This reads actual attrs and looks for the real data - in the datasets that the attrs were pointing to. - """ - resolved = {} - for key, val in attrs.items(): - try: - if isinstance(val, h5py.h5r.Reference): - # Old style object reference. If this cannot be dereferenced, it is - # likely because mergeHistory was used to get the current database, - # which does not preserve references. - resolved[key] = group[val] - elif isinstance(val, str): - m = ATTR_LINK.match(val) - if m: - # dereference the path to get the data out of the dataset. - resolved[key] = group[m.group(1)][()] - else: - resolved[key] = val - else: - resolved[key] = val - except ValueError: - runLog.error(f"HDF error loading {key} : {val}\nGroup: {group}") - raise - return resolved - - def collectBlockNumberDensities(blocks) -> Dict[str, numpy.ndarray]: """ Collect block-by-block homogenized number densities for each nuclide. diff --git a/armi/bookkeeping/db/databaseInterface.py b/armi/bookkeeping/db/databaseInterface.py new file mode 100644 index 000000000..bebe0c9c0 --- /dev/null +++ b/armi/bookkeeping/db/databaseInterface.py @@ -0,0 +1,421 @@ +# Copyright 2022 TerraPower, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +The database interface provides a way to save the reactor state to a file, throughout +a simulation. +""" +import copy +import os +import pathlib +import time +from typing import ( + Optional, + Tuple, + Sequence, + MutableSequence, +) + +from armi import context +from armi import interfaces +from armi import runLog +from armi.bookkeeping.db.database3 import Database3, getH5GroupName +from armi.reactor.parameters import parameterDefinitions +from armi.reactor.composites import ArmiObject +from armi.bookkeeping.db.typedefs import History, Histories +from armi.utils import getPreviousTimeNode, getStepLengths +from armi.settings.fwSettings.databaseSettings import ( + CONF_SYNC_AFTER_WRITE, + CONF_FORCE_DB_PARAMS, +) + + +ORDER = interfaces.STACK_ORDER.BOOKKEEPING + + +def describeInterfaces(cs): + """Function for exposing interface(s) to other code""" + return (DatabaseInterface, {"enabled": cs["db"]}) + + +class DatabaseInterface(interfaces.Interface): + """ + Handles interactions between the ARMI data model and the persistent data storage + system. + + This reads/writes the ARMI state to/from the database and helps derive state + information that can be derived. + """ + + name = "database" + + def __init__(self, r, cs): + interfaces.Interface.__init__(self, r, cs) + self._db = None + self._dbPath: Optional[pathlib.Path] = None + + if cs[CONF_FORCE_DB_PARAMS]: + toSet = {paramName: set() for paramName in cs[CONF_FORCE_DB_PARAMS]} + for (name, _), pDef in parameterDefinitions.ALL_DEFINITIONS.items(): + if name in toSet.keys(): + toSet[name].add(pDef) + + for name, pDefs in toSet.items(): + runLog.info( + "Forcing parameter {} to be written to the database, per user " + "input".format(name) + ) + for pDef in pDefs: + pDef.saveToDB = True + + def __repr__(self): + return "<{} '{}' {} >".format( + self.__class__.__name__, self.name, repr(self._db) + ) + + @property + def database(self): + """ + Presents the internal database object, if it exists. + """ + if self._db is not None: + return self._db + else: + raise RuntimeError( + "The Database interface has not yet created a database " + "object. InteractBOL or loadState must be called first." + ) + + def interactBOL(self): + """Initialize the database if the main interface was not available. (Begining of Life)""" + if not self._db: + self.initDB() + + def initDB(self, fName: Optional[os.PathLike] = None): + """ + Open the underlying database to be written to, and write input files to DB. + + Notes + ----- + Main Interface calls this so that the database is available as early as + possible in the run. The database interface interacts near the end of the + interface stack (so that all the parameters have been updated) while the Main + Interface interacts first. + """ + if fName is None: + self._dbPath = pathlib.Path(self.cs.caseTitle + ".h5") + else: + self._dbPath = pathlib.Path(fName) + + if self.cs["reloadDBName"].lower() == str(self._dbPath).lower(): + raise ValueError( + "It appears that reloadDBName is the same as the case " + "title. This could lead to data loss! Rename the reload DB or the " + "case." + ) + self._db = Database3(self._dbPath, "w") + self._db.open() + + # Grab geomString here because the DB-level has no access to the reactor or + # blueprints or anything. + # There's not always a geomFile; we are moving towards the core grid definition + # living in the blueprints themselves. In this case, the db doesnt need to store + # a geomFile at all. + if self.cs["geomFile"]: + with open(os.path.join(self.cs.inputDirectory, self.cs["geomFile"])) as f: + geomString = f.read() + else: + geomString = "" + self._db.writeInputsToDB(self.cs, geomString=geomString) + + def interactEveryNode(self, cycle, node): + """ + Write to database. + + DBs should receive the state information of the run at each node. + """ + # skip writing for last burn step since it will be written at interact EOC + if node < self.o.burnSteps[cycle]: + self.r.core.p.minutesSinceStart = ( + time.time() - self.r.core.timeOfStart + ) / 60.0 + self._db.writeToDB(self.r) + if self.cs[CONF_SYNC_AFTER_WRITE]: + self._db.syncToSharedFolder() + + def interactEOC(self, cycle=None): + """In case anything changed since last cycle (e.g. rxSwing), update DB. (End of Cycle)""" + # We cannot presume whether we are at EOL based on cycle and cs["nCycles"], + # since cs["nCycles"] is not a difinitive indicator of EOL; ultimately the + # Operator has the final say. + if not self.o.atEOL: + self.r.core.p.minutesSinceStart = ( + time.time() - self.r.core.timeOfStart + ) / 60.0 + self._db.writeToDB(self.r) + + def interactEOL(self): + """DB's should be closed at run's end. (End of Life)""" + # minutesSinceStarts should include as much of the ARMI run as possible so EOL + # is necessary, too. + self.r.core.p.minutesSinceStart = (time.time() - self.r.core.timeOfStart) / 60.0 + self._db.writeToDB(self.r) + self._db.close(True) + + def interactError(self): + r"""Get shutdown state information even if the run encounters an error""" + try: + self.r.core.p.minutesSinceStart = ( + time.time() - self.r.core.timeOfStart + ) / 60.0 + + # this can result in a double-error if the error occurred in the database + # writing + self._db.writeToDB(self.r, "error") + self._db.close(False) + except: # pylint: disable=bare-except; we're already responding to an error + pass + + def interactDistributeState(self) -> None: + """ + Reconnect to pre-existing database. + + DB is created and managed by the primary node only but we can still connect to it + from workers to enable things like history tracking. + """ + if context.MPI_RANK > 0: + # DB may not exist if distribute state is called early. + if self._dbPath is not None and os.path.exists(self._dbPath): + self._db = Database3(self._dbPath, "r") + self._db.open() + + def distributable(self): + return self.Distribute.SKIP + + def prepRestartRun(self): + """ + Load the data history from the database requested in the case setting + `reloadDBName`. + + Reactor state is put at the cycle/node requested in the case settings + `startCycle` and `startNode`, having loaded the state from all cycles prior + to that in the requested database. + + Notes + ----- + Mixing the use of simple vs detailed cycles settings is allowed, provided + that the cycle histories prior to `startCycle`/`startNode` are equivalent. + """ + reloadDBName = self.cs["reloadDBName"] + runLog.info( + f"Merging database history from {reloadDBName} for restart analysis." + ) + startCycle = self.cs["startCycle"] + startNode = self.cs["startNode"] + + with Database3(reloadDBName, "r") as inputDB: + loadDbCs = inputDB.loadCS() + + # pull the history up to the cycle/node prior to `startCycle`/`startNode` + dbCycle, dbNode = getPreviousTimeNode( + startCycle, + startNode, + self.cs, + ) + + # check that cycle histories are equivalent up to this point + self._checkThatCyclesHistoriesAreEquivalentUpToRestartTime( + loadDbCs, dbCycle, dbNode + ) + + self._db.mergeHistory(inputDB, startCycle, startNode) + self.loadState(dbCycle, dbNode) + + def _checkThatCyclesHistoriesAreEquivalentUpToRestartTime( + self, loadDbCs, dbCycle, dbNode + ): + dbStepLengths = getStepLengths(loadDbCs) + currentCaseStepLengths = getStepLengths(self.cs) + dbStepHistory = [] + currentCaseStepHistory = [] + try: + for cycleIdx in range(dbCycle + 1): + if cycleIdx == dbCycle: + # truncate it at dbNode + dbStepHistory.append(dbStepLengths[cycleIdx][:dbNode]) + currentCaseStepHistory.append( + currentCaseStepLengths[cycleIdx][:dbNode] + ) + else: + dbStepHistory.append(dbStepLengths[cycleIdx]) + currentCaseStepHistory.append(currentCaseStepLengths[cycleIdx]) + except IndexError: + runLog.error( + f"DB cannot be loaded to this time: cycle={dbCycle}, node={dbNode}" + ) + raise + + if dbStepHistory != currentCaseStepHistory: + raise ValueError( + "The cycle history up to the restart cycle/node must be equivalent." + ) + + # TODO: The use of "yield" here is suspect. + def _getLoadDB(self, fileName): + """ + Return the database to load from in order of preference. + + Notes + ----- + If filename is present only returns one database since specifically instructed + to load from that database. + """ + if fileName is not None: + # only yield 1 database if the file name is specified + if self._db is not None and fileName == self._db._fileName: + yield self._db + elif os.path.exists(fileName): + yield Database3(fileName, "r") + else: + if self._db is not None: + yield self._db + if os.path.exists(self.cs["reloadDBName"]): + yield Database3(self.cs["reloadDBName"], "r") + + def loadState( + self, cycle, timeNode, timeStepName="", fileName=None, updateGlobalAssemNum=True + ): + """ + Loads a fresh reactor and applies it to the Operator. + + Notes + ----- + Will load preferentially from the `fileName` if passed. Otherwise will load from + existing database in memory or `cs["reloadDBName"]` in that order. + + Raises + ------ + RuntimeError + If fileName is specified and that file does not have the time step. + If fileName is not specified and neither the database in memory, nor the + `cs["reloadDBName"]` have the time step specified. + """ + for potentialDatabase in self._getLoadDB(fileName): + with potentialDatabase as loadDB: + if loadDB.hasTimeStep(cycle, timeNode, statePointName=timeStepName): + newR = loadDB.load( + cycle, + timeNode, + statePointName=timeStepName, + cs=self.cs, + allowMissing=True, + updateGlobalAssemNum=updateGlobalAssemNum, + ) + self.o.reattach(newR, self.cs) + break + else: + # reactor was never set so fail + if fileName: + raise RuntimeError( + "Cannot load state from specified file {} @ {}".format( + fileName, getH5GroupName(cycle, timeNode, timeStepName) + ) + ) + raise RuntimeError( + "Cannot load state from @ {}".format( + getH5GroupName(cycle, timeNode, timeStepName) + ) + ) + + def getHistory( + self, + comp: ArmiObject, + params: Optional[Sequence[str]] = None, + timeSteps: Optional[MutableSequence[Tuple[int, int]]] = None, + byLocation: bool = False, + ) -> History: + """ + Get historical parameter values for a single object. + + This is mostly a wrapper around the same function on the ``Database3`` class, + but knows how to return the current value as well. + + See Also + -------- + Database3.getHistory + """ + # make a copy so that we can potentially remove timesteps without affecting the + # caller + timeSteps = copy.copy(timeSteps) + now = (self.r.p.cycle, self.r.p.timeNode) + nowRequested = timeSteps is None + if timeSteps is not None and now in timeSteps: + nowRequested = True + timeSteps.remove(now) + + if byLocation: + history = self.database.getHistoryByLocation(comp, params, timeSteps) + else: + history = self.database.getHistory(comp, params, timeSteps) + + if nowRequested: + for param in params or history.keys(): + if param == "location": + history[param][now] = tuple(comp.spatialLocator.indices) + else: + history[param][now] = comp.p[param] + + return history + + def getHistories( + self, + comps: Sequence[ArmiObject], + params: Optional[Sequence[str]] = None, + timeSteps: Optional[MutableSequence[Tuple[int, int]]] = None, + byLocation: bool = False, + ) -> Histories: + """ + Get historical parameter values for one or more objects. + + This is mostly a wrapper around the same function on the ``Database3`` class, + but knows how to return the current value as well. + + See Also + -------- + Database3.getHistories + """ + now = (self.r.p.cycle, self.r.p.timeNode) + nowRequested = timeSteps is None + if timeSteps is not None: + # make a copy so that we can potentially remove timesteps without affecting + # the caller + timeSteps = copy.copy(timeSteps) + if timeSteps is not None and now in timeSteps: + nowRequested = True + timeSteps.remove(now) + + if byLocation: + histories = self.database.getHistoriesByLocation(comps, params, timeSteps) + else: + histories = self.database.getHistories(comps, params, timeSteps) + + if nowRequested: + for c in comps: + for param in params or histories[c].keys(): + if param == "location": + histories[c][param][now] = c.spatialLocator.indices + else: + histories[c][param][now] = c.p[param] + + return histories diff --git a/armi/bookkeeping/db/layout.py b/armi/bookkeeping/db/layout.py new file mode 100644 index 000000000..a1b8f7da4 --- /dev/null +++ b/armi/bookkeeping/db/layout.py @@ -0,0 +1,878 @@ +# Copyright 2022 TerraPower, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Groundwork for ARMI Database, version 3.4. + +When interacting with the database file, the :py:class:`Layout` class is used to help +map the hierarchical Composite Reactor Model to the flat representation in +:py:class:`Database3 `. + +This module also stores packing/packing tools to support +:py:class:`Database3 `, as well as datbase +versioning information. +""" + +import collections +from typing import ( + Optional, + Tuple, + Type, + Dict, + Any, + List, +) + +import numpy + +from armi import runLog +from armi.reactor.components import Component +from armi.reactor.composites import ArmiObject +from armi.reactor import grids +from armi.reactor.reactors import Core +from armi.reactor.reactors import Reactor + +# Here we store the Database3 version information. +DB_MAJOR = 3 +DB_MINOR = 4 +DB_VERSION = f"{DB_MAJOR}.{DB_MINOR}" + +# CONSTANTS USED TO PACK AND UNPACK DATA +LOC_NONE = "N" +LOC_COORD = "C" +LOC_INDEX = "I" +LOC_MULTI = "M:" + +LOCATION_TYPE_LABELS = { + type(None): LOC_NONE, + grids.CoordinateLocation: LOC_COORD, + grids.IndexLocation: LOC_INDEX, + grids.MultiIndexLocation: LOC_MULTI, +} + +# NOTE: Here we assume no one assigns min(int)+2 as a meaningful value +NONE_MAP = {float: float("nan"), str: ""} +NONE_MAP.update( + { + intType: numpy.iinfo(intType).min + 2 + for intType in ( + int, + numpy.int8, + numpy.int16, + numpy.int32, + numpy.int64, + ) + } +) +NONE_MAP.update( + { + intType: numpy.iinfo(intType).max - 2 + for intType in ( + numpy.uint, + numpy.uint8, + numpy.uint16, + numpy.uint32, + numpy.uint64, + ) + } +) +NONE_MAP.update({floatType: floatType("nan") for floatType in (float, numpy.float64)}) + + +class Layout: + """ + The Layout class describes the hierarchical layout of the Composite Reactor model + in a flat representation for + :py:class:`Database3 `. + + A Layout is built by starting at the root of a composite tree and recursively + appending each node in the tree to a list of data. So the data will be ordered by + depth-first search: [r, c, a1, a1b1, a1b1c1, a1b1c2, a1b2, a1b2c1, ..., a2, ...]. + + The layout is also responsible for storing Component attributes, like location, + material, and temperatures, which aren't stored as Parameters. Temperatures, + specifically, are rather complicated in ARMI. + + Notes + ----- + * Elements in Layout are stored in depth-first order. This permits use of + algorithms such as Pre-Order Tree Traversal for efficient traversal of regions + of the model. + + * ``indexInData`` increases monotonically within each object ``type``. For + example, the data for all ``HexBlock`` children of a given parent are stored + contiguously within the ``HexBlock`` group, and will not be interleaved with + data from the ``HexBlock`` children of any of the parent's siblings. + + * Aside from the hierarchy, there is no guarantee what order objects are stored + in the layout. The ``Core`` is not necessarily the first child of the + ``Reactor``, and is not guaranteed to use the zeroth grid. + """ + + def __init__(self, version: Tuple[int, int], h5group=None, comp=None): + self.type: List[str] = [] + self.name: List[str] = [] + self.serialNum: List[int] = [] + # The index into the parameter datasets corresponding to each object's class. + # E.g., the 5th HexBlock object in the tree would get 5; to look up its + # "someParameter" value, you would extract cXXnYY/HexBlock/someParameter[5]. + self.indexInData: List[int] = [] + # The number of direct children this object has. + self.numChildren: List[int] = [] + # The type of location that specifies the object's physical location; see the + # associated pack/unpackLocation functions for more information about how + # locations are handled. + self.locationType: List[str] = [] + # There is a minor asymmetry here in that before writing to the DB, this is + # truly a flat list of tuples. However when reading, this may contain lists of + # tuples, which represent MI locations. This comes from the fact that we map the + # tuples to Location objects in Database3._compose, but map from Locations to + # tuples in Layout._createLayout. Ideally we would handle both directions in the + # same place so this can be less surprising. Resolving this would require + # changing the interface of the various pack/unpack functions, which have + # multiple versions, so the update would need to be done with care. + self.location: List[Tuple[int, int, int]] = [] + # Which grid, as stored in the database, this object uses to arrange its + # children + self.gridIndex: List[int] = [] + self.temperatures: List[float] = [] + self.material: List[str] = [] + # Used to cache all of the spatial locators so that we can pack them all at + # once. The benefit here is that the version checking can happen up front and + # less branching down below + self._spatialLocators: List[grids.LocationBase] = [] + # set of grid parameters that have been seen in _createLayout. For efficient + # checks for uniqueness + self._seenGridParams: Dict[Any, Any] = dict() + # actual list of grid parameters, with stable order for safe indexing + self.gridParams: List[Any] = [] + self.version = version + + self.groupedComps: Dict[ + Type[ArmiObject], List[ArmiObject] + ] = collections.defaultdict(list) + + # it should be noted, one of the two inputs must be non-None: comp/h5group + if comp is not None: + self._createLayout(comp) + self.locationType, self.location = _packLocations(self._spatialLocators) + else: + self._readLayout(h5group) + + self._snToLayoutIndex = {sn: i for i, sn in enumerate(self.serialNum)} + + # find all subclasses of Grid + self.gridClasses = {c.__name__: c for c in Layout.allSubclasses(grids.Grid)} + self.gridClasses["Grid"] = grids.Grid + + def __getitem__(self, sn): + layoutIndex = self._snToLayoutIndex[sn] + return ( + self.type[layoutIndex], + self.name[layoutIndex], + self.serialNum[layoutIndex], + self.indexInData[layoutIndex], + self.numChildren[layoutIndex], + self.locationType[layoutIndex], + self.location[layoutIndex], + self.temperatures[layoutIndex], + self.material[layoutIndex], + ) + + def _createLayout(self, comp): + """ + Populate a hierarchical representation and group the reactor model items by type. + + This is used when writing a reactor model to the database. + + Notes + ----- + This is recursive. + + See Also + -------- + _readLayout : does the opposite + """ + compList = self.groupedComps[type(comp)] + compList.append(comp) + + self.type.append(comp.__class__.__name__) + self.name.append(comp.name) + self.serialNum.append(comp.p.serialNum) + self.indexInData.append(len(compList) - 1) + self.numChildren.append(len(comp)) + + # determine how many components have been read in, to set the grid index + if comp.spatialGrid is not None: + gridType = type(comp.spatialGrid).__name__ + gridParams = (gridType, comp.spatialGrid.reduce()) + if gridParams not in self._seenGridParams: + self._seenGridParams[gridParams] = len(self.gridParams) + self.gridParams.append(gridParams) + self.gridIndex.append(self._seenGridParams[gridParams]) + else: + self.gridIndex.append(None) + + self._spatialLocators.append(comp.spatialLocator) + + # set the materials and temperatures + try: + self.temperatures.append((comp.inputTemperatureInC, comp.temperatureInC)) + self.material.append(comp.material.__class__.__name__) + except: + self.temperatures.append((-900, -900)) # an impossible temperature + self.material.append("") + + try: + comps = sorted(list(comp)) + except ValueError: + runLog.error( + "Failed to sort some collection of ArmiObjects for database output: {} " + "value {}".format(type(comp), list(comp)) + ) + raise + + # depth-first search recursion of all components + for c in comps: + self._createLayout(c) + + def _readLayout(self, h5group): + """ + Populate a hierarchical representation and group the reactor model items by type. + + This is used when reading a reactor model from a database. + + See Also + -------- + _createLayout : does the opposite + """ + try: + # location is either an index, or a point + # iter over list is faster + locations = h5group["layout/location"][:].tolist() + self.locationType = numpy.char.decode( + h5group["layout/locationType"][:] + ).tolist() + self.location = _unpackLocations( + self.locationType, locations, self.version[1] + ) + self.type = numpy.char.decode(h5group["layout/type"][:]) + self.name = numpy.char.decode(h5group["layout/name"][:]) + self.serialNum = h5group["layout/serialNum"][:] + self.indexInData = h5group["layout/indexInData"][:] + self.numChildren = h5group["layout/numChildren"][:] + self.material = numpy.char.decode(h5group["layout/material"][:]) + self.temperatures = h5group["layout/temperatures"][:] + self.gridIndex = replaceNonsenseWithNones( + h5group["layout/gridIndex"][:], "layout/gridIndex" + ) + + gridGroup = h5group["layout/grids"] + gridTypes = [t.decode() for t in gridGroup["type"][:]] + + self.gridParams = [] + for iGrid, gridType in enumerate(gridTypes): + thisGroup = gridGroup[str(iGrid)] + + unitSteps = thisGroup["unitSteps"][:] + bounds = [] + for ibound in range(3): + boundName = "bounds_{}".format(ibound) + if boundName in thisGroup: + bounds.append(thisGroup[boundName][:]) + else: + bounds.append(None) + unitStepLimits = thisGroup["unitStepLimits"][:] + offset = thisGroup["offset"][:] if thisGroup.attrs["offset"] else None + geomType = ( + thisGroup["geomType"].asstr()[()] + if "geomType" in thisGroup + else None + ) + symmetry = ( + thisGroup["symmetry"].asstr()[()] + if "symmetry" in thisGroup + else None + ) + + self.gridParams.append( + ( + gridType, + grids.GridParameters( + unitSteps, + bounds, + unitStepLimits, + offset, + geomType, + symmetry, + ), + ) + ) + + except KeyError as e: + runLog.error( + "Failed to get layout information from group: {}".format(h5group.name) + ) + raise e + + def _initComps(self, caseTitle, bp): + comps = [] + groupedComps = collections.defaultdict(list) + + for ( + compType, + name, + serialNum, + numChildren, + location, + material, + temperatures, + gridIndex, + ) in zip( + self.type, + self.name, + self.serialNum, + self.numChildren, + self.location, + self.material, + self.temperatures, + self.gridIndex, + ): + Klass = ArmiObject.TYPES[compType] + + if issubclass(Klass, Reactor): + comp = Klass(caseTitle, bp) + elif issubclass(Klass, Core): + comp = Klass(name) + elif issubclass(Klass, Component): + # XXX: initialize all dimensions to 0, they will be loaded and assigned + # after load + kwargs = dict.fromkeys(Klass.DIMENSION_NAMES, 0) + kwargs["material"] = material + kwargs["name"] = name + kwargs["Tinput"] = temperatures[0] + kwargs["Thot"] = temperatures[1] + comp = Klass(**kwargs) + else: + comp = Klass(name) + + if gridIndex is not None: + gridParams = self.gridParams[gridIndex] + comp.spatialGrid = self.gridClasses[gridParams[0]]( + *gridParams[1], armiObject=comp + ) + + comps.append((comp, serialNum, numChildren, location)) + groupedComps[compType].append(comp) + + return comps, groupedComps + + def writeToDB(self, h5group): + if "layout/type" in h5group: + # It looks like we have already written the layout to DB, skip for now + return + try: + h5group.create_dataset( + "layout/type", + data=numpy.array(self.type).astype("S"), + compression="gzip", + ) + h5group.create_dataset( + "layout/name", + data=numpy.array(self.name).astype("S"), + compression="gzip", + ) + h5group.create_dataset( + "layout/serialNum", data=self.serialNum, compression="gzip" + ) + h5group.create_dataset( + "layout/indexInData", data=self.indexInData, compression="gzip" + ) + h5group.create_dataset( + "layout/numChildren", data=self.numChildren, compression="gzip" + ) + h5group.create_dataset( + "layout/location", data=self.location, compression="gzip" + ) + h5group.create_dataset( + "layout/locationType", + data=numpy.array(self.locationType).astype("S"), + compression="gzip", + ) + h5group.create_dataset( + "layout/material", + data=numpy.array(self.material).astype("S"), + compression="gzip", + ) + h5group.create_dataset( + "layout/temperatures", data=self.temperatures, compression="gzip" + ) + + h5group.create_dataset( + "layout/gridIndex", + data=replaceNonesWithNonsense( + numpy.array(self.gridIndex), "layout/gridIndex" + ), + compression="gzip", + ) + + gridsGroup = h5group.create_group("layout/grids") + gridsGroup.attrs["nGrids"] = len(self.gridParams) + gridsGroup.create_dataset( + "type", data=numpy.array([gp[0] for gp in self.gridParams]).astype("S") + ) + + for igrid, gridParams in enumerate(gp[1] for gp in self.gridParams): + thisGroup = gridsGroup.create_group(str(igrid)) + thisGroup.create_dataset("unitSteps", data=gridParams.unitSteps) + + for ibound, bound in enumerate(gridParams.bounds): + if bound is not None: + bound = numpy.array(bound) + thisGroup.create_dataset("bounds_{}".format(ibound), data=bound) + + thisGroup.create_dataset( + "unitStepLimits", data=gridParams.unitStepLimits + ) + + offset = gridParams.offset + thisGroup.attrs["offset"] = offset is not None + if offset is not None: + thisGroup.create_dataset("offset", data=offset) + thisGroup.create_dataset("geomType", data=gridParams.geomType) + thisGroup.create_dataset("symmetry", data=gridParams.symmetry) + except RuntimeError: + runLog.error("Failed to create datasets in: {}".format(h5group)) + raise + + @staticmethod + def computeAncestors(serialNum, numChildren, depth=1) -> List[Optional[int]]: + """ + Return a list containing the serial number of the parent corresponding to each + object at the given depth. + + Depth in this case means how many layers to reach up to find the desired + ancestor. A depth of 1 will yield the direct parent of each element, depth of 2 + would yield the elemen's parent's parent, and so on. + + The zero-th element will always be None, as the first object is the root element + and so has no parent. Subsequent depths will result in more Nones. + + This function is useful for forming a lightweight sense of how the database + contents stitch together, without having to go to the trouble of fully unpacking + the Reactor model. + + Parameters + ---------- + serialNum : List of int + List of serial numbers for each object/element, as laid out in Layout + numChildren : List of int + List of numbers of children for each object/element, as laid out in Layout + + Note + ---- + This is not using a recursive approach for a couple of reasons. First, the + iterative form isn't so bad; we just need two stacks. Second, the interface of + the recursive function would be pretty unwieldy. We are progressively + consuming two lists, of which we would need to keep passing down with an + index/cursor, or progressively slice them as we go, which would be pretty + inefficient. + """ + ancestors: List[Optional[int]] = [None] + + snStack = [serialNum[0]] + ncStack = [numChildren[0]] + + for sn, nc in zip(serialNum[1:], numChildren[1:]): + ncStack[-1] -= 1 + if nc > 0: + ancestors.append(snStack[-1]) + snStack.append(sn) + ncStack.append(nc) + else: + ancestors.append(snStack[-1]) + + while ncStack and ncStack[-1] == 0: + snStack.pop() + ncStack.pop() + + if depth > 1: + # handle deeper scenarios. This is a bit tricky. Store the original + # ancestors for the first generation, since that ultimately contains all of + # the information that we need. Then in a loop, keep hopping one more layer + # of indirection, and indexing into the corresponding locaition in the + # original ancestor array + indexMap = {sn: i for i, sn in enumerate(serialNum)} + origAncestors = ancestors + for _ in range(depth - 1): + ancestors = [ + origAncestors[indexMap[ia]] if ia is not None else None + for ia in ancestors + ] + + return ancestors + + @staticmethod + def allSubclasses(cls) -> set: + """Find all subclasses of the given class, in any namespace.""" + return set(cls.__subclasses__()).union( + [s for c in cls.__subclasses__() for s in Layout.allSubclasses(c)] + ) + + +def _packLocations( + locations: List[grids.LocationBase], minorVersion: int = DB_MINOR +) -> Tuple[List[str], List[Tuple[int, int, int]]]: + """ + Extract information from a location needed to write it to this DB. + + Each locator has one locationType and up to N location-defining datums, + where N is the number of entries in a possible multiindex, or just 1 + for everything else. + + Shrink grid locator names for storage efficiency. + + Notes + ----- + Contains some conditionals to still load databases made before + db version 3.3 which can be removed once no users care about + those DBs anymore. + """ + if minorVersion <= 2: + locationTypes, locationData = _packLocationsV1(locations) + elif minorVersion == 3: + locationTypes, locationData = _packLocationsV2(locations) + elif minorVersion > 3: + locationTypes, locationData = _packLocationsV3(locations) + else: + raise ValueError("Unsupported minor version: {}".format(minorVersion)) + return locationTypes, locationData + + +def _packLocationsV1( + locations: List[grids.LocationBase], +) -> Tuple[List[str], List[Tuple[int, int, int]]]: + """Delete when reading v <=3.2 DB's no longer wanted.""" + locTypes = [] + locData: List[Tuple[int, int, int]] = [] + for loc in locations: + locationType = loc.__class__.__name__ + if loc is None: + locationType = "None" + locDatum = [(0.0, 0.0, 0.0)] + elif isinstance(loc, grids.IndexLocation): + locDatum = [loc.indices] + else: + raise ValueError(f"Invalid location type: {loc}") + + locTypes.append(locationType) + locData.extend(locDatum) + + return locTypes, locData + + +def _packLocationsV2( + locations: List[grids.LocationBase], +) -> Tuple[List[str], List[Tuple[int, int, int]]]: + """ + Location packing implementation for minor version 3. See release notes above. + """ + locTypes = [] + locData: List[Tuple[int, int, int]] = [] + for loc in locations: + locationType = LOCATION_TYPE_LABELS[type(loc)] + if loc is None: + locDatum = [(0.0, 0.0, 0.0)] + elif loc.__class__ is grids.CoordinateLocation: + locDatum = [loc.indices] + elif loc.__class__ is grids.IndexLocation: + locDatum = [loc.indices] + elif loc.__class__ is grids.MultiIndexLocation: + # encode number of sub-locations to allow in-line unpacking. + locationType += f"{len(loc)}" + locDatum = [subloc.indices for subloc in loc] + else: + raise ValueError(f"Invalid location type: {loc}") + + locTypes.append(locationType) + locData.extend(locDatum) + + return locTypes, locData + + +def _packLocationsV3( + locations: List[grids.LocationBase], +) -> Tuple[List[str], List[Tuple[int, int, int]]]: + """ + Location packing implementation for minor version 4. See release notes above. + """ + locTypes = [] + locData: List[Tuple[int, int, int]] = [] + + for loc in locations: + locationType = LOCATION_TYPE_LABELS[type(loc)] + if loc is None: + locDatum = [(0.0, 0.0, 0.0)] + elif type(loc) is grids.IndexLocation: + locDatum = [loc.getCompleteIndices()] + elif type(loc) is grids.CoordinateLocation: + # CoordinateLocations do not implement getCompleteIndices properly, and we + # do not really have a motivation to store them as we do with index + # locations. + locDatum = [loc.indices] + elif type(loc) is grids.MultiIndexLocation: + locationType += f"{len(loc)}" + locDatum = [subloc.indices for subloc in loc] + else: + raise ValueError(f"Invalid location type: {loc}") + + locTypes.append(locationType) + locData.extend(locDatum) + + return locTypes, locData + + +def _unpackLocations(locationTypes, locData, minorVersion: int = DB_MINOR): + """ + Convert location data as read from DB back into data structure for building reactor model. + + location and locationType will only have different lengths when multiindex locations + are used. + """ + if minorVersion < 3: + return _unpackLocationsV1(locationTypes, locData) + else: + return _unpackLocationsV2(locationTypes, locData) + + +def _unpackLocationsV1(locationTypes, locData): + """Delete when reading v <=3.2 DB's no longer wanted.""" + locsIter = iter(locData) + unpackedLocs = [] + for lt in locationTypes: + if lt == "None": + loc = next(locsIter) + unpackedLocs.append(None) + elif lt == "IndexLocation": + loc = next(locsIter) + # the data is stored as float, so cast back to int + unpackedLocs.append(tuple(int(i) for i in loc)) + else: + loc = next(locsIter) + unpackedLocs.append(tuple(loc)) + return unpackedLocs + + +def _unpackLocationsV2(locationTypes, locData): + """ + Location unpacking implementation for minor version 3+. See release notes above. + """ + locsIter = iter(locData) + unpackedLocs = [] + for lt in locationTypes: + if lt == LOC_NONE: + loc = next(locsIter) + unpackedLocs.append(None) + elif lt == LOC_INDEX: + loc = next(locsIter) + # the data is stored as float, so cast back to int + unpackedLocs.append(tuple(int(i) for i in loc)) + elif lt == LOC_COORD: + loc = next(locsIter) + unpackedLocs.append(tuple(loc)) + elif lt.startswith(LOC_MULTI): + # extract number of sublocations from e.g. "M:345" string. + numSubLocs = int(lt.split(":")[1]) + multiLocs = [] + for _ in range(numSubLocs): + subLoc = next(locsIter) + # All multiindexes sublocs are index locs + multiLocs.append(tuple(int(i) for i in subLoc)) + unpackedLocs.append(multiLocs) + else: + raise ValueError(f"Read unknown location type {lt}. Invalid DB.") + + return unpackedLocs + + +def replaceNonesWithNonsense( + data: numpy.ndarray, paramName: str, nones: numpy.ndarray = None +) -> numpy.ndarray: + """ + Replace instances of ``None`` with nonsense values that can be detected/recovered + when reading. + + Parameters + ---------- + data + The numpy array containing ``None`` values that need to be replaced. + + paramName + The name of the parameter who's data we are treating. Only used for diagnostics. + + nones + An array containing the index locations on the ``None`` elements. It is a little + strange to pass these, in but we find these indices to determine whether we need + to call this function in the first place, so might as well pass it in, so that + we don't need to perform the operation again. + + Notes + ----- + This only supports situations where the data is a straight-up ``None``, or a valid, + database-storable numpy array (or easily convertable to one (e.g. tuples/lists with + numerical values)). This does not support, for instance, a numpy ndarray with some + Nones in it. + + For example, the following is supported:: + + [[1, 2, 3], None, [7, 8, 9]] + + However, the following is not:: + + [[1, 2, 3], [4, None, 6], [7, 8, 9]] + + See Also + -------- + replaceNonsenseWithNones + Reverses this operation. + """ + if nones is None: + nones = numpy.where([d is None for d in data])[0] + + try: + # loop to find what the default value should be. This is the first non-None + # value that we can find. + defaultValue = None + realType = None + val = None + + for val in data: + if isinstance(val, numpy.ndarray): + # if multi-dimensional, val[0] could still be an array, val.flat is + # a flattened iterator, so next(val.flat) gives the first value in + # an n-dimensional array + realType = type(next(val.flat)) + + if realType is type(None): + continue + + defaultValue = numpy.reshape( + numpy.repeat(NONE_MAP[realType], val.size), val.shape + ) + break + else: + realType = type(val) + + if realType is type(None): + continue + + defaultValue = NONE_MAP[realType] + break + else: + # Couldn't find any non-None entries, so it really doesn't matter what type we + # use. Using float, because NaN is nice. + realType = float + defaultValue = NONE_MAP[realType] + + if isinstance(val, numpy.ndarray): + data = numpy.array([d if d is not None else defaultValue for d in data]) + else: + data[nones] = defaultValue + + except Exception as ee: + runLog.error( + "Error while attempting to determine default for {}.\nvalue: {}\nError: {}".format( + paramName, val, ee + ) + ) + raise TypeError( + "Could not determine None replacement for {} with type {}, val {}, default {}".format( + paramName, realType, val, defaultValue + ) + ) + + try: + data = data.astype(realType) + except: + raise ValueError( + "Could not coerce data for {} to {}, data:\n{}".format( + paramName, realType, data + ) + ) + + if data.dtype.kind == "O": + raise TypeError( + "Failed to convert data to valid HDF5 type {}, data:{}".format( + paramName, data + ) + ) + + return data + + +def replaceNonsenseWithNones(data: numpy.ndarray, paramName: str) -> numpy.ndarray: + """ + Replace special nonsense values with ``None``. + + This essentially reverses the operations performed by + :py:func:`replaceNonesWithNonsense`. + + Parameters + ---------- + data + The array from the database that contains special ``None`` nonsense values. + + paramName + The param name who's data we are dealing with. Only used for diagnostics. + + See Also + -------- + replaceNonesWithNonsense + """ + # NOTE: This is closely-related to the NONE_MAP. + if numpy.issubdtype(data.dtype, numpy.floating): + isNone = numpy.isnan(data) + elif numpy.issubdtype(data.dtype, numpy.integer): + isNone = data == numpy.iinfo(data.dtype).min + 2 + elif numpy.issubdtype(data.dtype, numpy.str_): + isNone = data == "" + else: + raise TypeError( + "Unable to resolve values that should be None for `{}`".format(paramName) + ) + + if data.ndim > 1: + result = numpy.ndarray(data.shape[0], dtype=numpy.dtype("O")) + for i in range(data.shape[0]): + if isNone[i].all(): + result[i] = None + elif isNone[i].any(): + # TODO: This is not symmetric with the replaceNonesWithNonsense impl. + # That one assumes that Nones apply only at the highest dimension, and + # that the lower dimensions will be filled with the magic None value. + # Non-none entries below the top level fail to coerce to a serializable + # numpy array and would raise an exception when trying to write. TL;DR: + # this is a dead branch until the replaceNonesWithNonsense impl is more + # sophisticated. + result[i] = numpy.array(data[i], dtype=numpy.dtype("O")) + result[i][isNone[i]] = None + else: + result[i] = data[i] + + else: + result = numpy.ndarray(data.shape, dtype=numpy.dtype("O")) + result[:] = data + result[isNone] = None + + return result diff --git a/armi/bookkeeping/db/tests/test_comparedb3.py b/armi/bookkeeping/db/tests/test_comparedb3.py index a149b60df..157d7fd19 100644 --- a/armi/bookkeeping/db/tests/test_comparedb3.py +++ b/armi/bookkeeping/db/tests/test_comparedb3.py @@ -19,7 +19,6 @@ import h5py import numpy as np -from armi.bookkeeping.db import database3 from armi.bookkeeping.db.compareDB3 import ( _compareAuxData, _diffSimpleData, @@ -28,6 +27,7 @@ DiffResults, OutputWriter, ) +from armi.bookkeeping.db.databaseInterface import DatabaseInterface from armi.reactor.tests import test_reactors from armi.tests import mockRunLogs, TEST_ROOT from armi.utils.directoryChangers import TemporaryDirectoryChanger @@ -94,7 +94,7 @@ def test_compareDatabaseDuplicate(self): dbs = [] for i in range(2): # create the tests DB - dbi = database3.DatabaseInterface(r, o.cs) + dbi = DatabaseInterface(r, o.cs) dbi.initDB(fName=self._testMethodName + str(i) + ".h5") db = dbi.database @@ -138,7 +138,7 @@ def test_compareDatabaseSim(self): ) # create the tests DB - dbi = database3.DatabaseInterface(r, cs) + dbi = DatabaseInterface(r, cs) dbi.initDB(fName=self._testMethodName + str(nCycles) + ".h5") db = dbi.database diff --git a/armi/bookkeeping/db/tests/test_database3.py b/armi/bookkeeping/db/tests/test_database3.py index 95557756f..185969bcf 100644 --- a/armi/bookkeeping/db/tests/test_database3.py +++ b/armi/bookkeeping/db/tests/test_database3.py @@ -12,19 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -r""" Tests for the Database3 class -""" +r""" Tests for the Database3 class""" # pylint: disable=missing-function-docstring,missing-class-docstring,abstract-method,protected-access,no-member,disallowed-name,invalid-name -import os import subprocess import unittest import h5py import numpy -from armi import context -from armi.bookkeeping.db import _getH5File, database3 -from armi.reactor import grids +from armi.bookkeeping.db import _getH5File +from armi.bookkeeping.db import database3 +from armi.bookkeeping.db.databaseInterface import DatabaseInterface from armi.reactor import parameters from armi.reactor.tests import test_reactors from armi.tests import TEST_ROOT @@ -42,7 +40,7 @@ def setUp(self): TEST_ROOT, customSettings={"reloadDBName": "reloadingDB.h5"} ) - self.dbi = database3.DatabaseInterface(self.r, self.o.cs) + self.dbi = DatabaseInterface(self.r, self.o.cs) self.dbi.initDB(fName=self._testMethodName + ".h5") self.db: database3.Database3 = self.dbi.database self.stateRetainer = self.r.retainState().__enter__() @@ -206,7 +204,7 @@ def test_prepRestartRun(self): ) # create a db based on the cs - dbi = database3.DatabaseInterface(r, cs) + dbi = DatabaseInterface(r, cs) dbi.initDB(fName="reloadingDB.h5") db = dbi.database @@ -242,7 +240,7 @@ def test_prepRestartRun(self): ) # create a db based on the cs - dbi = database3.DatabaseInterface(r, cs) + dbi = DatabaseInterface(r, cs) dbi.initDB(fName="reloadingDB.h5") db = dbi.database @@ -447,7 +445,7 @@ def test_mergeHistory(self): self.r.p.cycle = 1 self.r.p.timeNode = 0 tnGroup = self.db.getH5Group(self.r) - database3._writeAttrs( + database3.Database3._writeAttrs( tnGroup["layout/serialNum"], tnGroup, { @@ -470,8 +468,10 @@ def test_mergeHistory(self): "@/c01n00/attrs/0_fakeBigData", ) - # actually exercise the _resolveAttrs function - attrs = database3._resolveAttrs(tnGroup["layout/serialNum"].attrs, tnGroup) + # exercise the _resolveAttrs function + attrs = database3.Database3._resolveAttrs( + tnGroup["layout/serialNum"].attrs, tnGroup + ) self.assertTrue(numpy.array_equal(attrs["fakeBigData"], numpy.eye(6400))) keys = sorted(db2.keys()) @@ -619,101 +619,5 @@ def test_loadBlueprints(self): self.assertEqual(len(bp.assemblies), 0) -class TestLocationPacking(unittest.TestCase): - r"""Tests for database location""" - - def setUp(self): - self.td = TemporaryDirectoryChanger() - self.td.__enter__() - - def tearDown(self): - self.td.__exit__(None, None, None) - - def test_locationPacking(self): - # pylint: disable=protected-access - loc1 = grids.IndexLocation(1, 2, 3, None) - loc2 = grids.CoordinateLocation(4.0, 5.0, 6.0, None) - loc3 = grids.MultiIndexLocation(None) - loc3.append(grids.IndexLocation(7, 8, 9, None)) - loc3.append(grids.IndexLocation(10, 11, 12, None)) - - locs = [loc1, loc2, loc3] - tp, data = database3._packLocations(locs) - - self.assertEqual(tp[0], database3.LOC_INDEX) - self.assertEqual(tp[1], database3.LOC_COORD) - self.assertEqual(tp[2], database3.LOC_MULTI + "2") - - unpackedData = database3._unpackLocations(tp, data) - - self.assertEqual(unpackedData[0], (1, 2, 3)) - self.assertEqual(unpackedData[1], (4.0, 5.0, 6.0)) - self.assertEqual(unpackedData[2], [(7, 8, 9), (10, 11, 12)]) - - def test_locationPackingOlderVersions(self): - # pylint: disable=protected-access - for version in [1, 2]: - loc1 = grids.IndexLocation(1, 2, 3, None) - loc2 = grids.CoordinateLocation(4.0, 5.0, 6.0, None) - loc3 = grids.MultiIndexLocation(None) - loc3.append(grids.IndexLocation(7, 8, 9, None)) - loc3.append(grids.IndexLocation(10, 11, 12, None)) - - locs = [loc1, loc2, loc3] - tp, data = database3._packLocations(locs, minorVersion=version) - - self.assertEqual(tp[0], "IndexLocation") - self.assertEqual(tp[1], "CoordinateLocation") - self.assertEqual(tp[2], "MultiIndexLocation") - - unpackedData = database3._unpackLocations(tp, data, minorVersion=version) - - self.assertEqual(unpackedData[0], (1, 2, 3)) - self.assertEqual(unpackedData[1], (4.0, 5.0, 6.0)) - self.assertEqual(unpackedData[2][0].tolist(), [7, 8, 9]) - self.assertEqual(unpackedData[2][1].tolist(), [10, 11, 12]) - - def test_locationPackingOldVersion(self): - # pylint: disable=protected-access - version = 3 - - loc1 = grids.IndexLocation(1, 2, 3, None) - loc2 = grids.CoordinateLocation(4.0, 5.0, 6.0, None) - loc3 = grids.MultiIndexLocation(None) - loc3.append(grids.IndexLocation(7, 8, 9, None)) - loc3.append(grids.IndexLocation(10, 11, 12, None)) - - locs = [loc1, loc2, loc3] - tp, data = database3._packLocations(locs, minorVersion=version) - - self.assertEqual(tp[0], "I") - self.assertEqual(tp[1], "C") - self.assertEqual(tp[2], "M:2") - - unpackedData = database3._unpackLocations(tp, data, minorVersion=version) - - self.assertEqual(unpackedData[0], (1, 2, 3)) - self.assertEqual(unpackedData[1], (4.0, 5.0, 6.0)) - self.assertEqual(unpackedData[2][0], (7, 8, 9)) - self.assertEqual(unpackedData[2][1], (10, 11, 12)) - - def test_close(self): - intendedFileName = "xyz.h5" - - db = database3.Database3(intendedFileName, "w") - self.assertEqual(db._fileName, intendedFileName) - self.assertIsNone(db._fullPath) # this isn't set until the db is opened - - db.open() - self.assertEqual( - db._fullPath, os.path.join(context.getFastPath(), intendedFileName) - ) - - db.close() # this should move the file out of the FAST_PATH - self.assertEqual( - db._fullPath, os.path.join(os.path.abspath("."), intendedFileName) - ) - - if __name__ == "__main__": unittest.main() diff --git a/armi/bookkeeping/tests/test_databaseInterface.py b/armi/bookkeeping/db/tests/test_databaseInterface.py similarity index 99% rename from armi/bookkeeping/tests/test_databaseInterface.py rename to armi/bookkeeping/db/tests/test_databaseInterface.py index 1dced71e6..877eadbaf 100644 --- a/armi/bookkeeping/tests/test_databaseInterface.py +++ b/armi/bookkeeping/db/tests/test_databaseInterface.py @@ -26,7 +26,8 @@ from armi import interfaces from armi import runLog from armi import settings -from armi.bookkeeping.db.database3 import DatabaseInterface, Database3 +from armi.bookkeeping.db.database3 import Database3 +from armi.bookkeeping.db.databaseInterface import DatabaseInterface from armi.cases import case from armi.reactor import grids from armi.reactor.flags import Flags @@ -440,6 +441,7 @@ def _getOperatorThatChangesVariables(self, cs): mock = MockInterface(o.r, o.cs, None) + # pylint: disable=unused-argument def interactEveryNode(self, cycle, node): # Could use just += 1 but this will show more errors since it is less # suseptable to cancelation of errors off by one. diff --git a/armi/bookkeeping/db/tests/test_layout.py b/armi/bookkeeping/db/tests/test_layout.py new file mode 100644 index 000000000..526fe8e99 --- /dev/null +++ b/armi/bookkeeping/db/tests/test_layout.py @@ -0,0 +1,120 @@ +# Copyright 2022 TerraPower, LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +r""" Tests for the db Layout and associated tools""" +# pylint: disable=missing-function-docstring,missing-class-docstring,abstract-method,protected-access,no-member,disallowed-name,invalid-name +import os +import unittest + +from armi import context +from armi.bookkeeping.db import database3 +from armi.bookkeeping.db import layout +from armi.reactor import grids +from armi.utils.directoryChangers import TemporaryDirectoryChanger + + +class TestLocationPacking(unittest.TestCase): + r"""Tests for database location""" + + def setUp(self): + self.td = TemporaryDirectoryChanger() + self.td.__enter__() + + def tearDown(self): + self.td.__exit__(None, None, None) + + def test_locationPacking(self): + # pylint: disable=protected-access + loc1 = grids.IndexLocation(1, 2, 3, None) + loc2 = grids.CoordinateLocation(4.0, 5.0, 6.0, None) + loc3 = grids.MultiIndexLocation(None) + loc3.append(grids.IndexLocation(7, 8, 9, None)) + loc3.append(grids.IndexLocation(10, 11, 12, None)) + + locs = [loc1, loc2, loc3] + tp, data = layout._packLocations(locs) + + self.assertEqual(tp[0], layout.LOC_INDEX) + self.assertEqual(tp[1], layout.LOC_COORD) + self.assertEqual(tp[2], layout.LOC_MULTI + "2") + + unpackedData = layout._unpackLocations(tp, data) + + self.assertEqual(unpackedData[0], (1, 2, 3)) + self.assertEqual(unpackedData[1], (4.0, 5.0, 6.0)) + self.assertEqual(unpackedData[2], [(7, 8, 9), (10, 11, 12)]) + + def test_locationPackingOlderVersions(self): + # pylint: disable=protected-access + for version in [1, 2]: + loc1 = grids.IndexLocation(1, 2, 3, None) + loc2 = grids.CoordinateLocation(4.0, 5.0, 6.0, None) + loc3 = grids.MultiIndexLocation(None) + loc3.append(grids.IndexLocation(7, 8, 9, None)) + loc3.append(grids.IndexLocation(10, 11, 12, None)) + + locs = [loc1, loc2, loc3] + tp, data = layout._packLocations(locs, minorVersion=version) + + self.assertEqual(tp[0], "IndexLocation") + self.assertEqual(tp[1], "CoordinateLocation") + self.assertEqual(tp[2], "MultiIndexLocation") + + unpackedData = layout._unpackLocations(tp, data, minorVersion=version) + + self.assertEqual(unpackedData[0], (1, 2, 3)) + self.assertEqual(unpackedData[1], (4.0, 5.0, 6.0)) + self.assertEqual(unpackedData[2][0].tolist(), [7, 8, 9]) + self.assertEqual(unpackedData[2][1].tolist(), [10, 11, 12]) + + def test_locationPackingOldVersion(self): + # pylint: disable=protected-access + version = 3 + + loc1 = grids.IndexLocation(1, 2, 3, None) + loc2 = grids.CoordinateLocation(4.0, 5.0, 6.0, None) + loc3 = grids.MultiIndexLocation(None) + loc3.append(grids.IndexLocation(7, 8, 9, None)) + loc3.append(grids.IndexLocation(10, 11, 12, None)) + + locs = [loc1, loc2, loc3] + tp, data = layout._packLocations(locs, minorVersion=version) + + self.assertEqual(tp[0], "I") + self.assertEqual(tp[1], "C") + self.assertEqual(tp[2], "M:2") + + unpackedData = layout._unpackLocations(tp, data, minorVersion=version) + + self.assertEqual(unpackedData[0], (1, 2, 3)) + self.assertEqual(unpackedData[1], (4.0, 5.0, 6.0)) + self.assertEqual(unpackedData[2][0], (7, 8, 9)) + self.assertEqual(unpackedData[2][1], (10, 11, 12)) + + def test_close(self): + intendedFileName = "xyz.h5" + + db = database3.Database3(intendedFileName, "w") + self.assertEqual(db._fileName, intendedFileName) + self.assertIsNone(db._fullPath) # this isn't set until the db is opened + + db.open() + self.assertEqual( + db._fullPath, os.path.join(context.getFastPath(), intendedFileName) + ) + + db.close() # this should move the file out of the FAST_PATH + self.assertEqual( + db._fullPath, os.path.join(os.path.abspath("."), intendedFileName) + ) diff --git a/armi/nuclearDataIO/cccc/tests/test_pmatrx.py b/armi/nuclearDataIO/cccc/tests/test_pmatrx.py index d15139713..835aa726e 100644 --- a/armi/nuclearDataIO/cccc/tests/test_pmatrx.py +++ b/armi/nuclearDataIO/cccc/tests/test_pmatrx.py @@ -220,7 +220,7 @@ class TestProductionMatrix_FromWrittenAscii(TestPmatrx): """ Tests that show you can read and write pmatrx files from ascii libraries. - NOTES + Notes ----- This runs all the tests from TestPmatrx. """ diff --git a/doc/release/0.2.rst b/doc/release/0.2.rst index 8e6ab8741..331e589b6 100644 --- a/doc/release/0.2.rst +++ b/doc/release/0.2.rst @@ -8,6 +8,7 @@ Release Date: TBD What's new in ARMI ------------------ +#. Split 3 classes in ``da5abase3.py`` into 3 files (`PR#955 `_) #. TBD Bug fixes