TST: Resolve pandas warnings emitted during tests

This resolves a few warnings related to accessing things. Removing the `inplace=True` qualifier seems undesirable despite the warning being emitted for it, but it turns out that generally when this is given pandas is creating a copy internally anyway. pandas seem to have a general desire to remove the `inplace=` options everywhere so that code is not mutating, but remains more immutable. See the following comment from years ago. pandas-dev/pandas#16529 (comment)
equinor · Jul 10, 2024 · 02481d7 · 02481d7
1 parent 2cfc25e
commit 02481d7
Show file tree

Hide file tree

Showing 7 changed files with 53 additions and 35 deletions.
diff --git a/src/xtgeo/grid3d/_grid_etc1.py b/src/xtgeo/grid3d/_grid_etc1.py
@@ -424,17 +424,17 @@ def get_ijk_from_points(
     proplist[columnnames[2]] = karr
 
     mydataframe = pd.DataFrame.from_dict(proplist)
-    mydataframe.replace(UNDEF_INT, -1, inplace=True)
+    mydataframe = mydataframe.replace(UNDEF_INT, -1)
 
     if fmt == "float":
         mydataframe[columnnames[0]] = mydataframe[columnnames[0]].astype("float")
         mydataframe[columnnames[1]] = mydataframe[columnnames[1]].astype("float")
         mydataframe[columnnames[2]] = mydataframe[columnnames[2]].astype("float")
 
     if undef != -1:
-        mydataframe[columnnames[0]].replace(-1, undef, inplace=True)
-        mydataframe[columnnames[1]].replace(-1, undef, inplace=True)
-        mydataframe[columnnames[2]].replace(-1, undef, inplace=True)
+        mydataframe[columnnames[0]] = mydataframe[columnnames[0]].replace(-1, undef)
+        mydataframe[columnnames[1]] = mydataframe[columnnames[1]].replace(-1, undef)
+        mydataframe[columnnames[2]] = mydataframe[columnnames[2]].replace(-1, undef)
 
     if dataframe:
         return mydataframe

diff --git a/src/xtgeo/grid3d/_grid_wellzone.py b/src/xtgeo/grid3d/_grid_wellzone.py
@@ -102,19 +102,19 @@ def report_zone_mismatch(
 
     for zname in (zonelogname, zmodel):
         if skiprange:  # needed check; du to a bug in pandas version 0.21 .. 0.23
-            df[zname].replace(skiprange, -888, inplace=True)
-        df[zname].fillna(-999, inplace=True)
+            df[zname] = df[zname].replace(skiprange, -888)
+        df[zname] = df[zname].fillna(-999)
         if perflogname:
             if perflogname in df.columns:
-                df[perflogname].replace(np.nan, -1, inplace=True)
+                df[perflogname] = df[perflogname].replace(np.nan, -1)
                 pfr1, pfr2 = perflogrange
                 df[zname] = np.where(df[perflogname] < pfr1, -899, df[zname])
                 df[zname] = np.where(df[perflogname] > pfr2, -899, df[zname])
             else:
                 return None
         if filterlogname:
             if filterlogname in df.columns:
-                df[filterlogname].replace(np.nan, -1, inplace=True)
+                df[filterlogname] = df[filterlogname].replace(np.nan, -1)
                 ffr1, ffr2 = filterlogrange
                 df[zname] = np.where(df[filterlogname] < ffr1, -919, df[zname])
                 df[zname] = np.where(df[filterlogname] > ffr2, -919, df[zname])

diff --git a/src/xtgeo/well/_well_io.py b/src/xtgeo/well/_well_io.py
@@ -109,7 +109,7 @@ def import_rms_ascii(
 
     dfr = pd.read_csv(
         wfile.file,
-        delim_whitespace=True,
+        sep=r"\s+",
         skiprows=lnum,
         header=None,
         names=xlognames_all,
@@ -220,8 +220,7 @@ def export_rms_ascii(self, wfile, precision=4):
             print(f"{lname} {self.get_logtype(lname)} {usewrec}", file=fwell)
 
     # now export all logs as pandas framework
-    tmpdf = self._wdata.data.copy()
-    tmpdf.fillna(value=-999, inplace=True)
+    tmpdf = self._wdata.data.copy().fillna(value=-999)
 
     # make the disc as is np.int
     for lname in self.wlogtypes:

diff --git a/src/xtgeo/xyz/_xyz_data.py b/src/xtgeo/xyz/_xyz_data.py
@@ -300,15 +300,14 @@ def _ensure_consistency_df_dtypes(self):
         for name, attr_type in self._attr_types.items():
             if attr_type == _AttrType.CONT.value:
                 logger.debug("Replacing CONT undef...")
-                self._df[name].replace(
+                self._df.loc[:, name] = self._df[name].replace(
                     self._undef_cont,
                     np.float64(UNDEF_CONT).astype(self._floatbits),
-                    inplace=True,
                 )
             else:
                 logger.debug("Replacing INT undef...")
-                self._df[name].replace(
-                    self._undef_disc, np.int32(UNDEF_DISC), inplace=True
+                self._df.loc[:, name] = self._df[name].replace(
+                    self._undef_disc, np.int32(UNDEF_DISC)
                 )
         logger.info("Processed dataframe: %s", list(self._df.dtypes))
 
@@ -579,7 +578,7 @@ def create_relative_hlen(self):
             distance.append(math.hypot((previous_x - x), (y - previous_y)))
             previous_x, previous_y = x, y
 
-        self._df[_AttrName.R_HLEN_NAME.value] = pd.Series(
+        self._df.loc[:, _AttrName.R_HLEN_NAME.value] = pd.Series(
             np.cumsum(distance), index=self._df.index
         )
         self.ensure_consistency()

diff --git a/src/xtgeo/xyz/_xyz_io.py b/src/xtgeo/xyz/_xyz_io.py
@@ -1,5 +1,7 @@
 """Private import and export routines for XYZ stuff."""
 
+import contextlib
+
 import numpy as np
 import pandas as pd
 
@@ -19,7 +21,7 @@ def import_xyz(pfile, zname="Z_TVDSS"):
         "yname": "Y_UTMN",
         "values": pd.read_csv(
             pfile.file,
-            delim_whitespace=True,
+            sep=r"\s+",
             skiprows=0,
             header=None,
             names=["X_UTME", "Y_UTMN", zname],
@@ -70,7 +72,7 @@ def import_zmap(pfile, zname="Z_TVDSS"):
 
     df = pd.read_csv(
         pfile.file,
-        delim_whitespace=True,
+        sep=r"\s+",
         skiprows=16,
         header=None,
         names=[xname, yname, zname, pname],
@@ -150,20 +152,24 @@ def import_rms_attr(pfile, zname="Z_TVDSS"):
 
     dfr = pd.read_csv(
         pfile.file,
-        delim_whitespace=True,
+        sep=r"\s+",
         skiprows=skiprows,
         header=None,
         names=names,
         dtype=dtypes,
     )
     for col in dfr.columns[3:]:
         if col in _attrs:
+            # pandas gives a FutureWarning here due to casting what was
+            # previously a string to a float/int.
             if _attrs[col] == "float":
-                dfr[col].replace("UNDEF", UNDEF, inplace=True)
+                dfr[col] = dfr[col].replace("UNDEF", UNDEF).astype(float)
             elif _attrs[col] == "int":
-                dfr[col].replace("UNDEF", UNDEF_INT, inplace=True)
-            # cast to numerical if possible
-        dfr[col] = pd.to_numeric(dfr[col], errors="ignore")
+                dfr[col] = dfr[col].replace("UNDEF", UNDEF_INT).astype(int)
+
+        # cast to numerical if possible
+        with contextlib.suppress(ValueError, TypeError):
+            dfr[col] = pd.to_numeric(dfr[col])
 
     kwargs["values"] = dfr
     kwargs["attributes"] = _attrs
@@ -314,9 +320,9 @@ def export_rms_attr(self, pfile, attributes=True, pfilter=None, ispolygons=False
                 if col in df.columns:
                     fout.write(transl[self._attrs[col]] + " " + col + "\n")
                     if self._attrs[col] == "int":
-                        df[col].replace(UNDEF_INT, "UNDEF", inplace=True)
+                        df[col] = df[col].replace(UNDEF_INT, "UNDEF")
                     elif self._attrs[col] == "float":
-                        df[col].replace(UNDEF, "UNDEF", inplace=True)
+                        df[col] = df[col].replace(UNDEF, "UNDEF")
 
     with open(pfile, mode) as fc:
         df.to_csv(fc, sep=" ", header=None, columns=columns, index=False)

diff --git a/src/xtgeo/xyz/_xyz_oper.py b/src/xtgeo/xyz/_xyz_oper.py
@@ -147,18 +147,18 @@ def operation_polygons_v2(self, poly, value, opname="add", inside=True, where=Tr
     dataframe = self.get_dataframe()
 
     if opname == "add":
-        dataframe[self.zname][tmpdf._TMP == 1] += value
+        dataframe.loc[tmpdf._TMP == 1, self.zname] += value
     elif opname == "sub":
-        dataframe[self.zname][tmpdf._TMP == 1] -= value
+        dataframe.loc[tmpdf._TMP == 1, self.zname] -= value
     elif opname == "mul":
-        dataframe[self.zname][tmpdf._TMP == 1] *= value
+        dataframe.loc[tmpdf._TMP == 1, self.zname] *= value
     elif opname == "div":
         if value != 0.0:
-            dataframe[self.zname][tmpdf._TMP == 1] /= value
+            dataframe.loc[tmpdf._TMP == 1, self.zname] /= value
         else:
-            dataframe[self.zname][tmpdf._TMP == 1] = 0.0
+            dataframe.loc[tmpdf._TMP == 1, self.zname] = 0.0
     elif opname == "set":
-        dataframe[self.zname][tmpdf._TMP == 1] = value
+        dataframe.loc[tmpdf._TMP == 1, self.zname] = value
     elif opname == "eli":
         dataframe = dataframe[tmpdf._TMP == 0]
         dataframe.reset_index(inplace=True, drop=True)
@@ -561,7 +561,14 @@ def extend(self, distance, nsamples, addhlen=True):
 
         # setting row0[2] as row1[2] is intentional, as this shall be a 2D lenght!
         ier, newx, newy, _ = _cxtgeo.x_vector_linint2(
-            row1[0], row1[1], row1[2], row0[0], row0[1], row1[2], distance, 12
+            row1.iloc[0],
+            row1.iloc[1],
+            row1.iloc[2],
+            row0.iloc[0],
+            row0.iloc[1],
+            row1.iloc[2],
+            distance,
+            12,
         )
 
         if ier != 0:
@@ -582,7 +589,14 @@ def extend(self, distance, nsamples, addhlen=True):
 
         # setting row1[2] as row0[2] is intentional, as this shall be a 2D lenght!
         ier, newx, newy, _ = _cxtgeo.x_vector_linint2(
-            row0[0], row0[1], row0[2], row1[0], row1[1], row0[2], distance, 11
+            row0.iloc[0],
+            row0.iloc[1],
+            row0.iloc[2],
+            row1.iloc[0],
+            row1.iloc[1],
+            row0.iloc[2],
+            distance,
+            11,
         )
 
         rown[self.xname] = newx

diff --git a/tests/test_well/test_well.py b/tests/test_well/test_well.py
@@ -872,8 +872,8 @@ def test_create_surf_distance_log_more(tmp_path, loadwell1, testdata_path):
 
     for zname in (zonelogname, zmodel):
         if skiprange:  # needed check; du to a bug in pandas version 0.21 .. 0.23
-            dfr[zname].replace(skiprange, -888, inplace=True)
-        dfr[zname].fillna(-999, inplace=True)
+            dfr[zname] = dfr[zname].replace(skiprange, -888)
+        dfr[zname] = dfr[zname].fillna(-999)
     # now there are various variotions on how to count mismatch:
     # dfuse 1: count matches when zonelogname is valid (exclude -888)
     # dfuse 2: count matches when zonelogname OR zmodel are valid (exclude < -888