diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index 604506e8..5963b370 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -26,7 +26,7 @@ jobs: - name: Install fmu-ensemble and test dependencies run: | pip install pip -U - pip install .[tests] + pip install ".[test]" - name: Generate coverage report run: | diff --git a/.github/workflows/fmu-ensemble.yml b/.github/workflows/fmu-ensemble.yml index 768d74e9..cec954d1 100644 --- a/.github/workflows/fmu-ensemble.yml +++ b/.github/workflows/fmu-ensemble.yml @@ -41,19 +41,14 @@ jobs: run: git fetch --unshallow --tags - name: ๐Ÿ Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: ๐Ÿ“ฆ Install fmu-ensemble with dependencies - run: | - pip install --upgrade pip - pip install . - - - name: ๐Ÿ“ฆ Install test dependencies + - name: ๐Ÿ“ฆ Install fmu-ensemble with test dependencies run: | pip install res2df - pip install .[tests,docs] + pip install ".[test, docs]" pip install "${{ matrix.numpy-version }}" - name: Install ecl2df @@ -64,17 +59,11 @@ jobs: - name: ๐Ÿงพ List all installed packages run: pip freeze - - name: ๐Ÿ•ต๏ธ Check code style - env: - SKIP: no-commit-to-branch - run: | - pre-commit run --all-files - - name: ๐Ÿค– Run tests run: | python -c "import fmu.ensemble" pytest tests/ - pip install .[parquet] + pip install ".[parquet]" pytest tests/test_virtualensemble.py - name: Syntax check on RST documentation diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml new file mode 100644 index 00000000..978df765 --- /dev/null +++ b/.github/workflows/style.yml @@ -0,0 +1,43 @@ +name: style + +on: + push: + branches: + - master + pull_request: + branches: + - master + release: + types: + - published + +jobs: + fmu-ensemble: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.12"] + + steps: + - name: ๐Ÿ“– Checkout commit locally + uses: actions/checkout@v4 + + - name: ๐Ÿ“– Checkout tags + # This seems necessary for setuptools_scm to be able to infer + # the correct version. + run: git fetch --unshallow --tags + + - name: ๐Ÿ Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: ๐Ÿ“ฆ Install fmu-ensemble with style dependencies + run: | + pip install ".[style]" + + - name: ๐Ÿ•ต๏ธ Check code style + env: + SKIP: no-commit-to-branch + run: | + pre-commit run --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9b7251a5..d9af02c7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ repos: rev: v0.6.4 hooks: - id: ruff - args: [ --extend-select, I, --fix ] + args: [ --fix ] - id: ruff-format exclude: "tests/data/testensemble-reek001" diff --git a/.projectile b/.projectile deleted file mode 100644 index e69de29b..00000000 diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 3ea3e8ca..00000000 --- a/.pylintrc +++ /dev/null @@ -1,32 +0,0 @@ -# PYLINT: General settings for FMU modules - -[GENERAL] -disable=R0205, F0010, C0330, E1136, E0401 -output-format=colorized - -# E1136: Pylint is not able to detect that all objects really are subscriptable -# E0401: import-error, these will be caught by automated tests anyhow - -[MASTER] -init-hook='import sys; sys.path.append("src/")' -ignore=version.py,__init__.py,setup.py,jobs.py - -[BASIC] -good-names=logger, fmux, xfmu - -[FORMAT] -max-line-length=88 -max-module-lines=3000 - -[DESIGN] -max-attributes=25 -max-args=20 -max-locals=30 -max-statements=100 -max-branches=20 - -[SIMILARITIES] -min-similarity-lines=6 -ignore-comments=yes -ignore-docstrings=yes -ignore-imports=yes diff --git a/bandit.yml b/bandit.yml deleted file mode 100644 index 75d550c3..00000000 --- a/bandit.yml +++ /dev/null @@ -1 +0,0 @@ -skips: ['B101'] diff --git a/ci/testkomodo.sh b/ci/testkomodo.sh index 88e844af..d728bd3a 100644 --- a/ci/testkomodo.sh +++ b/ci/testkomodo.sh @@ -1,4 +1,3 @@ install_test_dependencies () { - pip install -r test_requirements.txt - pip install -r docs_requirements.txt + pip install ".[test, docs]" } diff --git a/docs_requirements.txt b/docs_requirements.txt deleted file mode 100644 index cb2d22d8..00000000 --- a/docs_requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -ipython -rstcheck -sphinx -sphinx-argparse -sphinx_rtd_theme diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..91952c2d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,109 @@ +[build-system] +requires = ["setuptools>65", "setuptools_scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "fmu-ensemble" +authors = [ + {name = "Hรฅvard Berland", email = "havb@equinor.com"}, +] +description = "Python API to ensembles produced by ERT" +requires-python = ">= 3.8" +readme = "README.rst" +license = {text = "GPL-3.0"} +keywords = ["fmu", "ensemble"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Natural Language :: English", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dynamic = ["version"] +dependencies = [ + "resdata >= 4.0.0", + "numpy", + "pandas", + "pyyaml >= 5.1", +] + +[project.urls] +Repository = "https://github.com/equinor/fmu-ensemble" + +[project.optional-dependencies] +test = [ + "pytest>=2.9.2", + "pytest-cov", +] +docs = [ + "ipython", + "rstcheck", + "sphinx", + "sphinx-argparse", + "sphinx_rtd_theme", +] +style = [ + "pre-commit", +] +parquet = [ + "pyarrow", +] + +[tool.setuptools] +package-dir = {"" = "src"} +include-package-data = true + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools_scm] +write_to = "src/fmu/ensemble/version.py" + +[tool.rstcheck] +ignore_directives = ["argparse", "automodule"] +# This looks like a bug in rstcheck: +ignore_messages = "Hyperlink target .* is not referenced" + +[tool.ruff] +src = ["src"] +line-length = 88 + +[tool.ruff.lint] +select = [ + "W", # pycodestyle + "I", # isort + "B", # flake-8-bugbear + "SIM", # flake-8-simplify + "F", # pyflakes + "PL", # pylint + "NPY", # numpy specific rules + "C4", # flake8-comprehensions +] +ignore = ["PLW2901", # redefined-loop-name + "PLR2004", # magic-value-comparison + "PLR0915", # too-many-statements + "PLR0912", # too-many-branches + "PLR0911", # too-many-return-statements + "PLC2701", # import-private-name + "PLR6201", # literal-membership + "PLR0914", # too-many-locals + "PLR6301", # no-self-use + "PLW1641", # eq-without-hash + "PLR0904", # too-many-public-methods + "PLR1702", # too-many-nested-blocks + "PLW3201", # bad-dunder-method-name + "B028", # no-explicit-stacklevel + "SIM118", # in-dict-keys +] + +[tool.ruff.lint.extend-per-file-ignores] +"tests/*" = [ +"PLW0603" # global-statement +] + +[tool.ruff.lint.pylint] +max-args = 20 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index cb785221..00000000 --- a/setup.cfg +++ /dev/null @@ -1,22 +0,0 @@ -[bdist_wheel] -universal = 1 - -[flake8] -max-line-length = 88 -exclude = docs, - tests/data - -[aliases] -test = pytest - -[tool:pytest] -addopts = --verbose - -[build_sphinx] -all-files = 1 -warning-is-error = 1 - -[rstcheck] -ignore_directives=argparse,automodule -# This looks like a bug in rstcheck: -ignore_messages=Hyperlink target .* is not referenced diff --git a/setup.py b/setup.py deleted file mode 100644 index 1ec2c85f..00000000 --- a/setup.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python - -"""The setup script.""" - -from setuptools import find_packages, setup - -with open("README.rst") as readme_file: - readme = readme_file.read() - -with open("HISTORY.rst", "rb") as history_file: - # Norwegian characters in HISTORY.rst - history = history_file.read().decode("UTF-8") - -REQUIREMENTS = [ - "resdata>=4.0.0", - "numpy", - "pandas", - "pyyaml>=5.1", -] - -SETUP_REQUIREMENTS = ["setuptools>=65", "setuptools_scm"] - -with open("test_requirements.txt") as f: - test_requirements = f.read().splitlines() -with open("docs_requirements.txt") as f: - docs_requirements = f.read().splitlines() - -EXTRAS_REQUIRE = { - "tests": test_requirements, - "docs": docs_requirements, - "parquet": ["pyarrow"], -} - -setup( - name="fmu-ensemble", - use_scm_version={"write_to": "src/fmu/ensemble/version.py"}, - description="Python API to ensembles produced by ERT", - long_description=readme + "\n\n" + history, - author="Hรฅvard Berland", - author_email="havb@equinor.com", - url="https://github.com/equinor/fmu-ensemble", - license="GPLv3", - packages=find_packages("src"), - package_dir={"": "src"}, - include_package_data=True, - install_requires=REQUIREMENTS, - zip_safe=False, - keywords="fmu, ensemble", - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Intended Audience :: Developers", - "Natural Language :: English", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - ], - test_suite="tests", - tests_require=test_requirements, - setup_requires=SETUP_REQUIREMENTS, - extras_require=EXTRAS_REQUIRE, - python_requires=">=3.8", -) diff --git a/src/fmu/ensemble/ensemble.py b/src/fmu/ensemble/ensemble.py index 5928f36c..3120673c 100644 --- a/src/fmu/ensemble/ensemble.py +++ b/src/fmu/ensemble/ensemble.py @@ -112,14 +112,12 @@ def __init__( globbedpaths = [glob.glob(path) for path in paths] globbedpaths = list({item for sublist in globbedpaths for item in sublist}) if not globbedpaths: - if isinstance(runpathfile, str): - if not runpathfile: - logger.warning("Initialized empty ScratchEnsemble") - return - if isinstance(runpathfile, pd.DataFrame): - if runpathfile.empty: - logger.warning("Initialized empty ScratchEnsemble") - return + if isinstance(runpathfile, str) and not runpathfile: + logger.warning("Initialized empty ScratchEnsemble") + return + if isinstance(runpathfile, pd.DataFrame) and runpathfile.empty: + logger.warning("Initialized empty ScratchEnsemble") + return count = None if globbedpaths: @@ -893,9 +891,8 @@ def filter(self, localpath, inplace=True, **kwargs): if inplace: if not realization.contains(localpath, **kwargs): deletethese.append(realidx) - else: - if realization.contains(localpath, **kwargs): - keepthese.append(realidx) + elif realization.contains(localpath, **kwargs): + keepthese.append(realidx) if inplace: logger.info("Removing realizations %s", deletethese) @@ -932,7 +929,7 @@ def drop(self, localpath, **kwargs): if shortcut2path(self.keys(), localpath) not in self.keys(): raise ValueError("%s not found" % localpath) for _, realization in self.realizations.items(): - try: + try: # noqa: SIM105 realization.drop(localpath, **kwargs) except ValueError: pass # Allow localpath to be missing in some realizations @@ -1176,7 +1173,7 @@ def get_wellnames(self, well_match=None): for well in well_match: result = result.union(set(eclsum.wells(well))) - return sorted(list(result)) + return sorted(result) def get_groupnames(self, group_match=None): """ @@ -1213,7 +1210,7 @@ def get_groupnames(self, group_match=None): for group in group_match: result = result.union(set(eclsum.groups(group))) - return sorted(list(result)) + return sorted(result) def agg(self, aggregation, keylist=None, excludekeys=None): """Aggregate the ensemble data into one VirtualRealization diff --git a/src/fmu/ensemble/ensembleset.py b/src/fmu/ensemble/ensembleset.py index 8f3ea240..c03e430e 100644 --- a/src/fmu/ensemble/ensembleset.py +++ b/src/fmu/ensemble/ensembleset.py @@ -439,7 +439,7 @@ def drop(self, localpath, **kwargs): if self.shortcut2path(localpath) not in self.keys(): raise ValueError("%s not found" % localpath) for _, ensemble in self._ensembles.items(): - try: + try: # noqa: SIM105 ensemble.drop(localpath, **kwargs) except ValueError: pass # Allow localpath to be missing in some ensembles. @@ -781,4 +781,4 @@ def get_wellnames(self, well_match=None): result = set() for _, ensemble in self._ensembles.items(): result = result.union(ensemble.get_wellnames(well_match)) - return sorted(list(result)) + return sorted(result) diff --git a/src/fmu/ensemble/observations.py b/src/fmu/ensemble/observations.py index e711a3a2..7d7b571c 100644 --- a/src/fmu/ensemble/observations.py +++ b/src/fmu/ensemble/observations.py @@ -75,7 +75,7 @@ def __init__(self, observations): observations: dict with observation structure or string with path to a yaml file. """ - self.observations = dict() + self.observations = {} if isinstance(observations, str): with open(observations) as yamlfile: @@ -267,20 +267,20 @@ def _realization_mismatch(self, real): measerror = 1 sign = (mismatch > 0) - (mismatch < 0) mismatches.append( - dict( - OBSTYPE=obstype, - OBSKEY=str(obsunit["localpath"]) + { + "OBSTYPE": obstype, + "OBSKEY": str(obsunit["localpath"]) + "/" + str(obsunit["key"]), - LABEL=obsunit.get("label", ""), - MISMATCH=mismatch, - L1=abs(mismatch), - L2=abs(mismatch) ** 2, - SIMVALUE=sim_value, - OBSVALUE=obsunit["value"], - MEASERROR=measerror, - SIGN=sign, - ) + "LABEL": obsunit.get("label", ""), + "MISMATCH": mismatch, + "L1": abs(mismatch), + "L2": abs(mismatch) ** 2, + "SIMVALUE": sim_value, + "OBSVALUE": obsunit["value"], + "MEASERROR": measerror, + "SIGN": sign, + } ) if obstype == "scalar": try: @@ -294,18 +294,18 @@ def _realization_mismatch(self, real): measerror = 1 sign = (mismatch > 0) - (mismatch < 0) mismatches.append( - dict( - OBSTYPE=obstype, - OBSKEY=str(obsunit["key"]), - LABEL=obsunit.get("label", ""), - MISMATCH=mismatch, - L1=abs(mismatch), - SIMVALUE=sim_value, - OBSVALUE=obsunit["value"], - MEASERROR=measerror, - L2=abs(mismatch) ** 2, - SIGN=sign, - ) + { + "OBSTYPE": obstype, + "OBSKEY": str(obsunit["key"]), + "LABEL": obsunit.get("label", ""), + "MISMATCH": mismatch, + "L1": abs(mismatch), + "SIMVALUE": sim_value, + "OBSVALUE": obsunit["value"], + "MEASERROR": measerror, + "L2": abs(mismatch) ** 2, + "SIGN": sign, + } ) if obstype == "smryh": if "time_index" in obsunit: @@ -352,16 +352,16 @@ def _realization_mismatch(self, real): ) measerror = 1 mismatches.append( - dict( - OBSTYPE="smryh", - OBSKEY=obsunit["key"], - LABEL=obsunit.get("label", ""), - MISMATCH=sim_hist["mismatch"].sum(), - MEASERROR=measerror, - L1=sim_hist["mismatch"].abs().sum(), - L2=math.sqrt((sim_hist["mismatch"] ** 2).sum()), - TIME_INDEX=time_index_str, - ) + { + "OBSTYPE": "smryh", + "OBSKEY": obsunit["key"], + "LABEL": obsunit.get("label", ""), + "MISMATCH": sim_hist["mismatch"].sum(), + "MEASERROR": measerror, + "L1": sim_hist["mismatch"].abs().sum(), + "L2": math.sqrt((sim_hist["mismatch"] ** 2).sum()), + "TIME_INDEX": time_index_str, + } ) if obstype == "smry": # For 'smry', there is a list of @@ -381,19 +381,19 @@ def _realization_mismatch(self, real): mismatch = float(sim_value - unit["value"]) sign = (mismatch > 0) - (mismatch < 0) mismatches.append( - dict( - OBSTYPE="smry", - OBSKEY=obsunit["key"], - DATE=unit["date"], - MEASERROR=unit["error"], - LABEL=unit.get("label", ""), - MISMATCH=mismatch, - OBSVALUE=unit["value"], - SIMVALUE=sim_value, - L1=abs(mismatch), - L2=abs(mismatch) ** 2, - SIGN=sign, - ) + { + "OBSTYPE": "smry", + "OBSKEY": obsunit["key"], + "DATE": unit["date"], + "MEASERROR": unit["error"], + "LABEL": unit.get("label", ""), + "MISMATCH": mismatch, + "OBSVALUE": unit["value"], + "SIMVALUE": sim_value, + "L1": abs(mismatch), + "L2": abs(mismatch) ** 2, + "SIGN": sign, + } ) return pd.DataFrame(mismatches) @@ -422,13 +422,12 @@ def _realization_misfit(self, real, defaulterrors=False, corr=None): zeroerrors = mismatch["MEASERROR"] < 1e-7 if defaulterrors: mismatch[zeroerrors]["MEASERROR"] = 1 - else: - if zeroerrors.any(): - print(mismatch[zeroerrors]) - raise ValueError( - "Zero measurement error in observation set" - + ". can't be used to calculate misfit" - ) + elif zeroerrors.any(): + print(mismatch[zeroerrors]) + raise ValueError( + "Zero measurement error in observation set" + + ". can't be used to calculate misfit" + ) if "MISFIT" not in mismatch.columns: mismatch["MISFIT"] = mismatch["L2"] / (mismatch["MEASERROR"] ** 2) @@ -484,8 +483,10 @@ def _clean_observations(self): continue # If time_index is not a supported mnemonic, # parse it to a date object - if "time_index" in unit: - if unit["time_index"] not in [ + if ( + "time_index" in unit + and unit["time_index"] + not in { "raw", "report", "yearly", @@ -493,18 +494,20 @@ def _clean_observations(self): "first", "last", "monthly", - ] and not isinstance(unit["time_index"], datetime.datetime): - try: - unit["time_index"] = dateutil.parser.isoparse( - unit["time_index"] - ).date() - except (TypeError, ValueError) as exception: - logger.warning( - "Parsing date %s failed with error", - (str(unit["time_index"]), str(exception)), - ) - del smryhunits[smryhunits.index(unit)] - continue + } + and not isinstance(unit["time_index"], datetime.datetime) + ): + try: + unit["time_index"] = dateutil.parser.isoparse( + unit["time_index"] + ).date() + except (TypeError, ValueError) as exception: + logger.warning( + "Parsing date %s failed with error", + (str(unit["time_index"]), str(exception)), + ) + del smryhunits[smryhunits.index(unit)] + continue # If everything has been deleted through cleanup, delete the section if not smryhunits: del self.observations["smryh"] diff --git a/src/fmu/ensemble/realization.py b/src/fmu/ensemble/realization.py index ee41a574..669d9ef3 100644 --- a/src/fmu/ensemble/realization.py +++ b/src/fmu/ensemble/realization.py @@ -345,11 +345,11 @@ def load_scalar( value = parse_number(value) if not isinstance(value, str): self.data[localpath] = value - else: - # In case we are re-reading, we must - # ensure there is no value present now: - if localpath in self.data: - del self.data[localpath] + + # In case we are re-reading, we must + # ensure there is no value present now: + elif localpath in self.data: + del self.data[localpath] else: self.data[localpath] = value return value @@ -463,12 +463,9 @@ def load_csv(self, localpath, convert_numeric=True, force_reread=False): [self.files, pd.DataFrame([filerow])], ignore_index=True ) try: - if convert_numeric: - # Trust that Pandas will determine sensible datatypes - # faster than the convert_numeric() function - dtype = None - else: - dtype = str + # Trust that Pandas will determine sensible datatypes + # faster than the convert_numeric() function + dtype = None if convert_numeric else str dframe = pd.read_csv(fullpath, dtype=dtype) if "REAL" in dframe: dframe.rename(columns={"REAL": "REAL_ORIG"}, inplace=True) @@ -706,9 +703,7 @@ def get_df(self, localpath, merge=None): # this function happily returns references to the internal # dataframes in the realization object. So ensure # we copy dataframes if any merging is about to happen. - if isinstance(data, pd.DataFrame): - data = data.copy() - elif isinstance(data, dict): + if isinstance(data, (pd.DataFrame, dict)): data = data.copy() elif isinstance(data, (str, int, float, np.number)): # Convert scalar data into something mergeable @@ -958,9 +953,9 @@ def get_eclsum(self, cache=True, include_restart=True): EclSum: object representing the summary file. None if nothing was found. """ - if cache and self._eclsum: # Return cached object if available - if self._eclsum_include_restart == include_restart: - return self._eclsum + # Return cached object if available + if cache and self._eclsum and self._eclsum_include_restart == include_restart: + return self._eclsum unsmry_file_row = self.files[self.files.FILETYPE == "UNSMRY"] unsmry_filename = None @@ -1368,9 +1363,12 @@ def contains(self, localpath, **kwargs): return False if not kwargs: return localpath in self.keys() - if isinstance(self.data[localpath], dict): - if "key" in kwargs and "value" not in kwargs: - return kwargs["key"] in self.data[localpath] + if ( + isinstance(self.data[localpath], dict) + and "key" in kwargs + and "value" not in kwargs + ): + return kwargs["key"] in self.data[localpath] if isinstance(self.data[localpath], pd.DataFrame): if "key" in kwargs: raise ValueError("Don't use key for tabular data") @@ -1455,10 +1453,7 @@ def drop(self, localpath, **kwargs): def __repr__(self): """Represent the realization. Show only the last part of the path""" pathsummary = self._origpath[-50:] - if self.index is not None: - indexstr = str(self.index) - else: - indexstr = "Error" + indexstr = str(self.index) if self.index is not None else "Error" return "".format(indexstr, pathsummary) def __sub__(self, other): diff --git a/src/fmu/ensemble/util/rates.py b/src/fmu/ensemble/util/rates.py index 2b27e2ca..a2e8e267 100644 --- a/src/fmu/ensemble/util/rates.py +++ b/src/fmu/ensemble/util/rates.py @@ -47,11 +47,8 @@ def compute_volumetric_rates(realization, column_keys, time_index, time_unit): Returns: A dataframe indexed by DATE with cumulative columns. """ - if isinstance(time_unit, str): - if time_unit not in ["days", "months", "years"]: - raise ValueError( - "Unsupported time_unit " + time_unit + " for volumetric rates" - ) + if isinstance(time_unit, str) and time_unit not in {"days", "months", "years"}: + raise ValueError("Unsupported time_unit " + time_unit + " for volumetric rates") # pylint: disable=protected-access column_keys = realization._glob_smry_keys(column_keys) diff --git a/src/fmu/ensemble/virtualensemble.py b/src/fmu/ensemble/virtualensemble.py index 7db04dd5..c9e9eac5 100644 --- a/src/fmu/ensemble/virtualensemble.py +++ b/src/fmu/ensemble/virtualensemble.py @@ -392,10 +392,7 @@ def agg(self, aggregation, keylist=None, excludekeys=None): if not (int in dtypes or float in dtypes): logger.info("No numerical data to aggregate in %s", key) continue - if groupby: - aggobject = data.groupby(groupby) - else: - aggobject = data + aggobject = data.groupby(groupby) if groupby else data if quantilematcher.match(aggregation): quantile = int(quantilematcher.match(aggregation).group(1)) @@ -503,15 +500,14 @@ def prepare_vens_directory(filesystempath, delete=False): logger.info(" - Deleted existing directory") shutil.rmtree(filesystempath) os.mkdir(filesystempath) - else: - if os.listdir(filesystempath): - logger.critical( - ( - "Refusing to write virtual ensemble " - " to non-empty directory" - ) + elif os.listdir(filesystempath): + logger.critical( + ( + "Refusing to write virtual ensemble " + " to non-empty directory" ) - raise IOError("Directory %s not empty" % filesystempath) + ) + raise IOError("Directory %s not empty" % filesystempath) else: os.mkdir(filesystempath) @@ -592,20 +588,16 @@ def prepare_vens_directory(filesystempath, delete=False): for key in self.keys(): dirname = os.path.join(filesystempath, os.path.dirname(key)) - if dirname: - if not os.path.exists(dirname): - os.makedirs(dirname) + if dirname and not os.path.exists(dirname): + os.makedirs(dirname) data = self.get_df(key) filename = os.path.join(dirname, os.path.basename(key)) # Trim .csv from end of dict-key # .csv will be reinstated by logic in from_disk() - if filename[-4:] == ".csv": - filebase = filename[:-4] - else: - # parameters.txt or STATUS ends here: - filebase = filename + # parameters.txt or STATUS ends here + filebase = filename[:-4] if filename[-4:] == ".csv" else filename if not isinstance(data, pd.DataFrame): raise ValueError("VirtualEnsembles should " + "only store DataFrames") @@ -675,9 +667,8 @@ def from_disk(self, filesystempath, fmt="parquet", lazy_load=False): for filename in filenames: # Special treatment of the filename "_name" if filename == "_name": - self._name = "".join( - open(os.path.join(root, filename), "r").readlines() - ).strip() + with open(os.path.join(root, filename), "r") as fhandle: + self._name = "".join(fhandle.readlines()).strip() if filename == "_manifest.yml": self.manifest = os.path.join(root, "_manifest.yml") @@ -732,10 +723,7 @@ def from_disk(self, filesystempath, fmt="parquet", lazy_load=False): self.update_realindices() end_time = datetime.datetime.now() - if lazy_load: - lazy_str = "(lazy) " - else: - lazy_str = "" + lazy_str = "(lazy) " if lazy_load else "" logger.info( "Loading ensemble from disk %stook %g seconds", lazy_str, diff --git a/src/fmu/ensemble/virtualrealization.py b/src/fmu/ensemble/virtualrealization.py index 1dbbcccf..a4b8624e 100644 --- a/src/fmu/ensemble/virtualrealization.py +++ b/src/fmu/ensemble/virtualrealization.py @@ -89,10 +89,9 @@ def to_disk(self, filesystempath, delete=False): if delete: shutil.rmtree(filesystempath) os.mkdir(filesystempath) - else: - if os.listdir(filesystempath): - logger.critical("Refusing to write to non-empty directory") - raise IOError("Directory %s not empty" % filesystempath) + elif os.listdir(filesystempath): + logger.critical("Refusing to write to non-empty directory") + raise IOError("Directory %s not empty" % filesystempath) else: os.mkdir(filesystempath) @@ -107,9 +106,8 @@ def to_disk(self, filesystempath, delete=False): for key in self.keys(): dirname = os.path.join(filesystempath, os.path.dirname(key)) - if dirname: - if not os.path.exists(dirname): - os.makedirs(dirname) + if dirname and not os.path.exists(dirname): + os.makedirs(dirname) data = self.get_df(key) filename = os.path.join(dirname, os.path.basename(key)) @@ -157,9 +155,8 @@ def load_disk(self, filesystempath): for root, _, filenames in os.walk(filesystempath): for filename in filenames: if filename == "_description": - self._description = " ".join( - open(os.path.join(root, filename)).readlines() - ) + with open(os.path.join(root, filename)) as fhandle: + self._description = " ".join(fhandle.readlines()) logger.info("got name as %s", self._description) elif filename == "STATUS": self.append("STATUS", pd.read_csv(os.path.join(root, filename))) diff --git a/test_requirements.txt b/test_requirements.txt deleted file mode 100644 index 51f4dd5d..00000000 --- a/test_requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -pre-commit -pytest>=2.9.2 -pytest-cov -pyyaml>=5.1 diff --git a/tests/test_ensemble.py b/tests/test_ensemble.py index 761fa296..3508809a 100644 --- a/tests/test_ensemble.py +++ b/tests/test_ensemble.py @@ -87,7 +87,7 @@ def test_reek001(tmpdir): assert "NPV" in reekensemble.load_txt("outputs.txt").columns # Check implicit discovery assert "outputs.txt" in reekensemble.files["LOCALPATH"].values - assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) + assert all(os.path.isabs(x) for x in reekensemble.files["FULLPATH"]) # File discovery: csvvolfiles = reekensemble.find_files( @@ -117,7 +117,7 @@ def test_reek001(tmpdir): assert len(newfiles.columns) + 1 == len(csvvolfiles.columns) # FULLPATH should always contain absolute paths - assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) + assert all(os.path.isabs(x) for x in reekensemble.files["FULLPATH"]) # The metadata in the rediscovered files should have been removed assert reekensemble.files[reekensemble.files["GRID"] == "simgrid"].empty @@ -807,7 +807,7 @@ def test_ertrunpathfile(): ) assert len(ens) == 5 - assert all([os.path.isabs(x) for x in ens.files["FULLPATH"]]) + assert all(os.path.isabs(x) for x in ens.files["FULLPATH"]) # Check that the UNSMRY files has been discovered, they should always be # because ECLBASE is given in the runpathfile assert sum(["UNSMRY" in x for x in ens.files["BASENAME"].unique()]) == 5 @@ -850,35 +850,35 @@ def test_eclsumcaching(): ens.load_smry() # Default is to do caching, so these will not be None: - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) + assert all(x._eclsum for (idx, x) in ens.realizations.items()) # If we redo this operation, the same objects should all # be None afterwards: ens.load_smry(cache_eclsum=False) # cache_eclsum==None is from v1.1.5 no longer equivalent to False - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) + assert not any(x._eclsum for (idx, x) in ens.realizations.items()) ens.get_smry() - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) + assert all(x._eclsum for (idx, x) in ens.realizations.items()) ens.get_smry(cache_eclsum=False) - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) + assert not any(x._eclsum for (idx, x) in ens.realizations.items()) ens.get_smry_stats() - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) + assert all(x._eclsum for (idx, x) in ens.realizations.items()) ens.get_smry_stats(cache_eclsum=False) - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) + assert not any(x._eclsum for (idx, x) in ens.realizations.items()) ens.get_smry_dates() - assert all([x._eclsum for (idx, x) in ens.realizations.items()]) + assert all(x._eclsum for (idx, x) in ens.realizations.items()) # Clear the cached objects because the statement above has cached it.. for _, realization in ens.realizations.items(): realization._eclsum = None ens.get_smry_dates(cache_eclsum=False) - assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) + assert not any(x._eclsum for (idx, x) in ens.realizations.items()) def test_filedescriptors(): diff --git a/tests/test_ensemblecombination.py b/tests/test_ensemblecombination.py index e4d0dee8..90c6d033 100644 --- a/tests/test_ensemblecombination.py +++ b/tests/test_ensemblecombination.py @@ -74,7 +74,7 @@ def test_ensemblecombination_basic(): assert not vhalf_filtered2.get_df("unsmry--yearly").empty with pytest.raises((KeyError, ValueError)): # pylint: disable=pointless-statement - vhalf_filtered2.parameters + _ = vhalf_filtered2.parameters # Get summary data with parameters: smry_params = vhalf.get_df("unsmry--yearly", merge="parameters.txt") diff --git a/tests/test_ensembleset.py b/tests/test_ensembleset.py index dfed25bb..3f42f487 100644 --- a/tests/test_ensembleset.py +++ b/tests/test_ensembleset.py @@ -1,5 +1,6 @@ """Testing fmu-ensemble, EnsembleSet class.""" +import contextlib import glob import logging import os @@ -63,10 +64,8 @@ def test_ensembleset_reek001(tmpdir): assert len(ensset["iter-1"].get_df("STATUS")) == 250 # Try adding the same object over again - try: + with contextlib.suppress(ValueError): ensset.add_ensemble(iter0) - except ValueError: - pass assert len(ensset) == 2 # Unchanged! # Initializing nothing, we get warning about the missing name @@ -422,12 +421,10 @@ def test_filestructures(tmpdir): ) os.makedirs(runpath1) os.makedirs(runpath2) - open(os.path.join(runpath1, "parameters.txt"), "w").write( - "REALTIMESITER " + str(real * iterr) + "\n" - ) - open(os.path.join(runpath1, "parameters.txt"), "w").write( - "REALTIMESITERX2 " + str(real * iterr * 2) + "\n" - ) + with open(os.path.join(runpath1, "parameters.txt"), "w") as fhandle: + fhandle.write("REALTIMESITER " + str(real * iterr) + "\n") + with open(os.path.join(runpath2, "parameters.txt"), "w") as fhandle: + fhandle.write("REALTIMESITERX2 " + str(real * iterr * 2) + "\n") # Initializing from this ensemble root should give nothing, # we do not recognize this iter_*/real_* by default @@ -525,23 +522,23 @@ def test_ertrunpathfile(tmp="TMP"): # Also construct an artificial ert runpathfile with iter-0 and iter-1, # by modifying a copy of the runpath for iter-0 - iter0runpath = open(testdir + "/data/ert-runpath-file", "r").readlines() + with open(testdir + "/data/ert-runpath-file", "r") as fhandle: + iter0runpath = fhandle.readlines() if not os.path.exists(tmp): os.mkdir(tmp) - enssetrunpathfile = open(tmp + "/ensset-runpath-file", "w") - print(iter0runpath) - enssetrunpathfile.write("".join(iter0runpath)) - for line in iter0runpath: - (real, path, eclname, _) = line.split() - enssetrunpathfile.write(real + " ") # CHECK THIS! - # Could the first column just be the line number? - # Iterate on the ERT official doc when determined. - enssetrunpathfile.write(path.replace("iter-0", "iter-1") + " ") - enssetrunpathfile.write(eclname + " ") - enssetrunpathfile.write("001" + "\n") - enssetrunpathfile.close() + with open(tmp + "/ensset-runpath-file", "w") as enssetrunpathfile: + print(iter0runpath) + enssetrunpathfile.write("".join(iter0runpath)) + for line in iter0runpath: + (real, path, eclname, _) = line.split() + enssetrunpathfile.write(real + " ") # CHECK THIS! + # Could the first column just be the line number? + # Iterate on the ERT official doc when determined. + enssetrunpathfile.write(path.replace("iter-0", "iter-1") + " ") + enssetrunpathfile.write(eclname + " ") + enssetrunpathfile.write("001" + "\n") ensset = EnsembleSet("ensfromrunpath", runpathfile=tmp + "/ensset-runpath-file") assert len(ensset) == 2 diff --git a/tests/test_observations.py b/tests/test_observations.py index c7299b40..15b3dd7d 100644 --- a/tests/test_observations.py +++ b/tests/test_observations.py @@ -275,18 +275,18 @@ def test_errormessages(): Observations(3) # Unsupported observation category, this foobar will be wiped - emptyobs = Observations(dict(foobar="foo")) + emptyobs = Observations({"foobar": "foo"}) assert emptyobs.empty # (there will be logged a warning) # Empty observation set should be ok, but it must be a dict - empty2 = Observations(dict()) + empty2 = Observations({}) assert empty2.empty with pytest.raises(ValueError): Observations([]) # Check that the dict is a dict of lists: - assert Observations(dict(smry="not_a_list")).empty + assert Observations({"smry": "not_a_list"}).empty # (warning will be printed) # This should give a warning because 'observation' is missing diff --git a/tests/test_realization.py b/tests/test_realization.py index 91c490b5..6fce798d 100644 --- a/tests/test_realization.py +++ b/tests/test_realization.py @@ -114,7 +114,7 @@ def test_single_realization(tmpdir): assert "emptyscalarfile" in real.files["LOCALPATH"].values # Check that FULLPATH always has absolute paths - assert all([os.path.isabs(x) for x in real.files["FULLPATH"]]) + assert all(os.path.isabs(x) for x in real.files["FULLPATH"]) with pytest.raises(IOError): real.load_scalar("notexisting.txt") @@ -328,7 +328,8 @@ def test_volumetric_rates(): # Pick 10 **random** dates to get the volumetric rates between: daily_dates = real.get_smry_dates(freq="daily", normalize=False) - subset_dates = np.random.choice(daily_dates, size=10, replace=False) + rng = np.random.default_rng() + subset_dates = rng.choice(daily_dates, size=10, replace=False) subset_dates.sort() dcum = real.get_smry(column_keys="FOPT", time_index=subset_dates) ddcum = real.get_volumetric_rates(column_keys="FOPT", time_index=subset_dates) @@ -612,7 +613,7 @@ def test_singlereal_ecl(tmp="TMP"): with pytest.raises((ValueError, KeyError)): # This does not exist before we have asked for it # pylint: disable=pointless-statement - "FOPT" in real["unsmry--yearly"] + _ = "FOPT" in real["unsmry--yearly"] def test_can_import_summary_files_beyond_2262(tmpdir, monkeypatch): @@ -798,8 +799,8 @@ def test_filesystem_changes(): # Should not fail # Try with an empty STATUS file: - fhandle = open(realdir + "/STATUS", "w") - fhandle.close() + with open(realdir + "/STATUS", "w") as fhandle: + pass real = ensemble.ScratchRealization(realdir) assert real.get_df("STATUS").empty # This demonstrates we can fool the Realization object, and @@ -807,35 +808,33 @@ def test_filesystem_changes(): # Try with a STATUS file with error message on first job # the situation where there is one successful job. - fhandle = open(realdir + "/STATUS", "w") - fhandle.write( - ( - "Current host : st-rst16-02-03/x86_64 " - "file-server:10.14.10.238\n" - "LSF JOBID: not running LSF\n" - "COPY_FILE : 20:58:57 .... 20:59:00 " - "EXIT: 1/Executable: /project/res/komodo/2018.02/root/etc/ERT/" - "Config/jobs/util/script/copy_file.py failed with exit code: 1\n" + with open(realdir + "/STATUS", "w") as fhandle: + fhandle.write( + ( + "Current host : st-rst16-02-03/x86_64 " + "file-server:10.14.10.238\n" + "LSF JOBID: not running LSF\n" + "COPY_FILE : 20:58:57 .... 20:59:00 " + "EXIT: 1/Executable: /project/res/komodo/2018.02/root/etc/ERT/" + "Config/jobs/util/script/copy_file.py failed with exit code: 1\n" + ) ) - ) - fhandle.close() real = ensemble.ScratchRealization(realdir) # When issue 37 is resolved, update this to 1 and check the # error message is picked up. assert len(real.get_df("STATUS")) == 1 - fhandle = open(realdir + "/STATUS", "w") - fhandle.write( - ( - "Current host : st-rst16-02-03/x86_64 " - "file-server:10.14.10.238\n" - "LSF JOBID: not running LSF\n" - "COPY_FILE : 20:58:55 .... 20:58:57\n" - "COPY_FILE : 20:58:57 .... 20:59:00 " - " EXIT: 1/Executable: /project/res/komodo/2018.02/root/etc/ERT/" - "Config/jobs/util/script/copy_file.py failed with exit code: 1 " + with open(realdir + "/STATUS", "w") as fhandle: + fhandle.write( + ( + "Current host : st-rst16-02-03/x86_64 " + "file-server:10.14.10.238\n" + "LSF JOBID: not running LSF\n" + "COPY_FILE : 20:58:55 .... 20:58:57\n" + "COPY_FILE : 20:58:57 .... 20:59:00 " + " EXIT: 1/Executable: /project/res/komodo/2018.02/root/etc/ERT/" + "Config/jobs/util/script/copy_file.py failed with exit code: 1 " + ) ) - ) - fhandle.close() real = ensemble.ScratchRealization(realdir) assert len(real.get_df("STATUS")) == 2 # Check that we have the error string picked up: @@ -865,10 +864,9 @@ def test_filesystem_changes(): # Unquoted valued with spaces will be truncated, # quoted valued will be correctly parsed # (read_csv(sep='\s+') is the parser) - param_file = open(realdir + "/parameters.txt", "a") - param_file.write("FOOBAR 1 2 3 4 5 6\n") - param_file.write('FOOSPACES "1 2 3 4 5 6"\n') - param_file.close() + with open(realdir + "/parameters.txt", "a") as param_file: + param_file.write("FOOBAR 1 2 3 4 5 6\n") + param_file.write('FOOSPACES "1 2 3 4 5 6"\n') real = ensemble.ScratchRealization(realdir) assert real.parameters["FOOBAR"] == 1 @@ -1069,7 +1067,7 @@ def test_find_files_yml(): fileh.write("baah") yamlfile = "." + filename + ".yml" with open(os.path.join(realdir, yamlfile), "w") as fileh: - fileh.write(yaml.dump(dict(a=dict(x=1, y=2), b="bar"))) + fileh.write(yaml.dump({"a": {"x": 1, "y": 2}, "b": "bar"})) # Now find the gri files, and add metadata: files_df = real.find_files("*.gri", metayaml=True) @@ -1120,7 +1118,7 @@ def test_get_smry_meta(): # Can create dataframes like this: meta_df = pd.DataFrame.from_dict(meta, orient="index") hist_keys = meta_df[meta_df["is_historical"]].index - assert all([key.split(":")[0].endswith("H") for key in hist_keys]) + assert all(key.split(":")[0].endswith("H") for key in hist_keys) # When virtualizing a realization, smry data must be loaded # for smry metadata to be conserved @@ -1197,7 +1195,7 @@ def test_get_df_merge(): assert "top_structure" in scalar_dict # Inject a random dict and merge with: - real.data["foodict"] = dict(BAR="COM") + real.data["foodict"] = {"BAR": "COM"} dframe = real.get_df("parameters", merge="foodict") assert "BAR" in dframe assert "SORG1" in dframe diff --git a/tests/test_virtualensemble.py b/tests/test_virtualensemble.py index 9a522fcc..bbbdc32c 100644 --- a/tests/test_virtualensemble.py +++ b/tests/test_virtualensemble.py @@ -312,21 +312,26 @@ def test_todisk(tmpdir): assert set(vens.keys()) == set(fromcsvdisk2.keys()) # Test manual intervention: - fooframe = pd.DataFrame(data=np.random.randn(3, 3), columns=["FOO", "BAR", "COM"]) + rng = np.random.default_rng() + fooframe = pd.DataFrame( + data=rng.standard_normal(size=(3, 3)), columns=["FOO", "BAR", "COM"] + ) fooframe.to_csv(os.path.join("vens_dumped", "share/results/tables/randomdata.csv")) manualens = VirtualEnsemble(fromdisk="vens_dumped") assert "share/results/tables/randomdata.csv" not in manualens.keys() # Now with correct column header, # but floating point data for realizations.. - fooframe = pd.DataFrame(data=np.random.randn(3, 3), columns=["REAL", "BAR", "COM"]) + fooframe = pd.DataFrame( + data=rng.standard_normal(size=(3, 3)), columns=["REAL", "BAR", "COM"] + ) fooframe.to_csv(os.path.join("vens_dumped", "share/results/tables/randomdata.csv")) manualens = VirtualEnsemble(fromdisk="vens_dumped") assert "share/results/tables/randomdata.csv" not in manualens.keys() # Now with correct column header, and with integer data for REAL.. fooframe = pd.DataFrame( - data=np.random.randint(low=0, high=100, size=(3, 3)), + data=rng.integers(low=0, high=100, size=(3, 3)), columns=["REAL", "BAR", "COM"], ) fooframe.to_csv(os.path.join("vens_dumped", "share/results/tables/randomdata.csv")) diff --git a/tests/test_virtualrealization.py b/tests/test_virtualrealization.py index c33f1cfe..0a70ffa7 100644 --- a/tests/test_virtualrealization.py +++ b/tests/test_virtualrealization.py @@ -352,7 +352,7 @@ def test_glob_smry_keys(): assert len(vreal._glob_smry_keys(["FOP*"])) == 9 assert len(vreal._glob_smry_keys("WOPT:*")) == 8 - assert all([x.startswith("WOPT:") for x in vreal._glob_smry_keys("WOPT:*")]) + assert all(x.startswith("WOPT:") for x in vreal._glob_smry_keys("WOPT:*")) assert not vreal._glob_smry_keys("FOOBAR")