feat(python)!: Remove serde functionality from pl.read_json and `Da…

…taFrame.write_json` (#16550)
pola-rs · Jun 4, 2024 · b61d4e6 · b61d4e6
1 parent f7f7d07
commit b61d4e6
Show file tree

Hide file tree

Showing 7 changed files with 65 additions and 161 deletions.
diff --git a/docs/src/python/user-guide/io/json.py b/docs/src/python/user-guide/io/json.py
@@ -19,6 +19,9 @@
 df.write_json("docs/data/path.json")
 # --8<-- [end:write]
 
+"""
 # --8<-- [start:scan]
 df = pl.scan_ndjson("docs/data/path.json")
 # --8<-- [end:scan]
+
+"""
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -2295,30 +2295,12 @@ def serialize_to_string() -> str:
             return None
 
     @overload
-    def write_json(
-        self,
-        file: None = ...,
-        *,
-        row_oriented: bool = ...,
-        pretty: bool | None = ...,
-    ) -> str: ...
+    def write_json(self, file: None = ...) -> str: ...
 
     @overload
-    def write_json(
-        self,
-        file: IOBase | str | Path,
-        *,
-        row_oriented: bool = ...,
-        pretty: bool | None = ...,
-    ) -> None: ...
+    def write_json(self, file: IOBase | str | Path) -> None: ...
 
-    def write_json(
-        self,
-        file: IOBase | str | Path | None = None,
-        *,
-        row_oriented: bool = False,
-        pretty: bool | None = None,
-    ) -> str | None:
+    def write_json(self, file: IOBase | str | Path | None = None) -> str | None:
         """
         Serialize to JSON representation.
 
@@ -2327,17 +2309,6 @@ def write_json(
         file
             File path or writable file-like object to which the result will be written.
             If set to `None` (default), the output is returned as a string instead.
-        row_oriented
-            Write to row oriented json. This is slower, but more common.
-
-        pretty
-            Pretty serialize json.
-
-            .. deprecated:: 0.20.31
-                The `pretty` functionality for `write_json` will be removed in the next
-                breaking release. Use :meth:`serialize` to serialize the DataFrame in
-                the regular JSON format.
-
 
         See Also
         --------
@@ -2351,43 +2322,28 @@ def write_json(
         ...         "bar": [6, 7, 8],
         ...     }
         ... )
-        >>> df.write_json(row_oriented=True)
+        >>> df.write_json()
         '[{"foo":1,"bar":6},{"foo":2,"bar":7},{"foo":3,"bar":8}]'
         """
-        if pretty is not None:
-            issue_deprecation_warning(
-                "The `pretty` functionality for `write_json` will be removed in the next breaking release."
-                " Use `DataFrame.serialize` to serialize the DataFrame in the regular JSON format.",
-                version="0.20.31",
-            )
-        else:
-            pretty = False
-
-        if not row_oriented:
-            issue_deprecation_warning(
-                "`DataFrame.write_json` will only write row-oriented JSON in the next breaking release."
-                " Use `DataFrame.serialize` instead.",
-                version="0.20.31",
-            )
 
-        def write_json_to_string(*, pretty: bool, row_oriented: bool) -> str:
+        def write_json_to_string() -> str:
             with BytesIO() as buf:
-                self._df.write_json_old(buf, pretty=pretty, row_oriented=row_oriented)
+                self._df.write_json(buf)
                 json_bytes = buf.getvalue()
             return json_bytes.decode("utf8")
 
         if file is None:
-            return write_json_to_string(pretty=pretty, row_oriented=row_oriented)
+            return write_json_to_string()
         elif isinstance(file, StringIO):
-            json_str = write_json_to_string(pretty=pretty, row_oriented=row_oriented)
+            json_str = write_json_to_string()
             file.write(json_str)
             return None
         elif isinstance(file, (str, Path)):
             file = normalize_filepath(file)
-            self._df.write_json_old(file, pretty=pretty, row_oriented=row_oriented)
+            self._df.write_json(file)
             return None
         else:
-            self._df.write_json_old(file, pretty=pretty, row_oriented=row_oriented)
+            self._df.write_json(file)
             return None
 
     @overload

diff --git a/py-polars/pyproject.toml b/py-polars/pyproject.toml
@@ -238,9 +238,6 @@ filterwarnings = [
   # TODO: Remove when behavior is updated
   # https://github.com/pola-rs/polars/issues/13441
   "ignore:.*default coalesce behavior of left join.*:DeprecationWarning",
-  # TODO: Remove when default is updated
-  # https://github.com/pola-rs/polars/issues/14526
-  "ignore:.*will only write row-oriented JSON.*:DeprecationWarning",
 ]
 xfail_strict = true
 

diff --git a/py-polars/src/dataframe/io.rs b/py-polars/src/dataframe/io.rs
@@ -215,44 +215,27 @@ impl PyDataFrame {
         schema: Option<Wrap<Schema>>,
         schema_overrides: Option<Wrap<Schema>>,
     ) -> PyResult<Self> {
-        // memmap the file first.
-
         use crate::file::read_if_bytesio;
         py_f = read_if_bytesio(py_f);
         let mmap_bytes_r = get_mmap_bytes_reader(&py_f)?;
 
         py.allow_threads(move || {
-            let mmap_read: ReaderBytes = (&mmap_bytes_r).into();
-            let bytes = mmap_read.deref();
-            // Happy path is our column oriented json as that is most performant,
-            // on failure we try the arrow json reader instead, which is row-oriented.
-            match serde_json::from_slice::<DataFrame>(bytes) {
-                Ok(df) => Ok(df.into()),
-                Err(e) => {
-                    let msg = format!("{e}");
-                    if msg.contains("successful parse invalid data") {
-                        let e = PyPolarsErr::from(PolarsError::ComputeError(msg.into()));
-                        Err(PyErr::from(e))
-                    } else {
-                        let mut builder = JsonReader::new(mmap_bytes_r)
-                            .with_json_format(JsonFormat::Json)
-                            .infer_schema_len(infer_schema_length);
-
-                        if let Some(schema) = schema {
-                            builder = builder.with_schema(Arc::new(schema.0));
-                        }
-
-                        if let Some(schema) = schema_overrides.as_ref() {
-                            builder = builder.with_schema_overwrite(&schema.0);
-                        }
-
-                        let out = builder
-                            .finish()
-                            .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
-                        Ok(out.into())
-                    }
-                },
+            let mut builder = JsonReader::new(mmap_bytes_r)
+                .with_json_format(JsonFormat::Json)
+                .infer_schema_len(infer_schema_length);
+
+            if let Some(schema) = schema {
+                builder = builder.with_schema(Arc::new(schema.0));
+            }
+
+            if let Some(schema) = schema_overrides.as_ref() {
+                builder = builder.with_schema_overwrite(&schema.0);
             }
+
+            let out = builder
+                .finish()
+                .map_err(|e| PyPolarsErr::Other(format!("{e}")))?;
+            Ok(out.into())
         })
     }
 
@@ -493,27 +476,6 @@ impl PyDataFrame {
             .map_err(|e| PyPolarsErr::Other(format!("{e}")).into())
     }
 
-    /// This method can be removed entirely in the next breaking release.
-    #[cfg(feature = "json")]
-    pub fn write_json_old(
-        &mut self,
-        py_f: PyObject,
-        pretty: bool,
-        row_oriented: bool,
-    ) -> PyResult<()> {
-        match (pretty, row_oriented) {
-            (_, true) => self.write_json(py_f),
-            (false, _) => self.serialize(py_f),
-            (true, _) => {
-                let file = BufWriter::new(get_file_like(py_f, true)?);
-
-                serde_json::to_writer_pretty(file, &self.df)
-                    .map_err(|e| polars_err!(ComputeError: "{e}"))
-                    .map_err(|e| PyPolarsErr::Other(format!("{e}")).into())
-            },
-        }
-    }
-
     #[cfg(feature = "json")]
     pub fn write_ndjson(&mut self, py_f: PyObject) -> PyResult<()> {
         let file = BufWriter::new(get_file_like(py_f, true)?);

diff --git a/py-polars/tests/unit/dataframe/test_serde.py b/py-polars/tests/unit/dataframe/test_serde.py
@@ -146,15 +146,32 @@ def test_json_deserialize_empty_list_10458() -> None:
     assert df.schema == schema
 
 
-def test_df_write_json_deprecated() -> None:
-    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    with pytest.deprecated_call():
-        result = df.write_json()
-    assert result == df.serialize()
-
-
-def test_df_write_json_pretty_deprecated() -> None:
-    df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-    with pytest.deprecated_call():
-        result = df.write_json(pretty=True)
-    assert isinstance(result, str)
+def test_serde_validation() -> None:
+    f = io.StringIO(
+        """
+    {
+      "columns": [
+        {
+          "name": "a",
+          "datatype": "Int64",
+          "values": [
+            1,
+            2
+          ]
+        },
+        {
+          "name": "b",
+          "datatype": "Int64",
+          "values": [
+            1
+          ]
+        }
+      ]
+    }
+    """
+    )
+    with pytest.raises(
+        pl.ComputeError,
+        match=r"lengths don't match",
+    ):
+        pl.DataFrame.deserialize(f)
diff --git a/py-polars/tests/unit/io/test_json.py b/py-polars/tests/unit/io/test_json.py
@@ -11,9 +11,9 @@
 from polars.testing import assert_frame_equal
 
 
-def test_write_json_row_oriented() -> None:
+def test_write_json() -> None:
     df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", None]})
-    out = df.write_json(row_oriented=True)
+    out = df.write_json()
     assert out == '[{"a":1,"b":"a"},{"a":2,"b":"b"},{"a":3,"b":null}]'
 
     # Test round trip
@@ -27,11 +27,10 @@ def test_write_json_row_oriented() -> None:
 def test_write_json_categoricals() -> None:
     data = {"column": ["test1", "test2", "test3", "test4"]}
     df = pl.DataFrame(data).with_columns(pl.col("column").cast(pl.Categorical))
-
-    assert (
-        df.write_json(row_oriented=True, file=None)
-        == '[{"column":"test1"},{"column":"test2"},{"column":"test3"},{"column":"test4"}]'
+    expected = (
+        '[{"column":"test1"},{"column":"test2"},{"column":"test3"},{"column":"test4"}]'
     )
+    assert df.write_json() == expected
 
 
 def test_write_json_duration() -> None:
@@ -44,8 +43,9 @@ def test_write_json_duration() -> None:
     )
 
     # we don't guarantee a format, just round-circling
-    value = str(df.write_json(row_oriented=True))
-    assert value == """[{"a":"PT91762.939S"},{"a":"PT91762.89S"},{"a":"PT6020.836S"}]"""
+    value = df.write_json()
+    expected = '[{"a":"PT91762.939S"},{"a":"PT91762.89S"},{"a":"PT6020.836S"}]'
+    assert value == expected
 
 
 def test_json_infer_schema_length_11148() -> None:

diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py
@@ -538,37 +538,6 @@ def test_invalid_group_by_arg() -> None:
         df.group_by(1).agg({"a": "sum"})
 
 
-def test_serde_validation() -> None:
-    f = io.StringIO(
-        """
-    {
-      "columns": [
-        {
-          "name": "a",
-          "datatype": "Int64",
-          "values": [
-            1,
-            2
-          ]
-        },
-        {
-          "name": "b",
-          "datatype": "Int64",
-          "values": [
-            1
-          ]
-        }
-      ]
-    }
-    """
-    )
-    with pytest.raises(
-        pl.ComputeError,
-        match=r"lengths don't match",
-    ):
-        pl.read_json(f)
-
-
 def test_overflow_msg() -> None:
     with pytest.raises(
         pl.ComputeError,