diff --git a/crates/polars-io/src/json/infer.rs b/crates/polars-io/src/json/infer.rs index 0ff83225e97f..cbee672baccc 100644 --- a/crates/polars-io/src/json/infer.rs +++ b/crates/polars-io/src/json/infer.rs @@ -2,7 +2,7 @@ use std::num::NonZeroUsize; use polars_core::prelude::DataType; use polars_core::utils::try_get_supertype; -use polars_error::{polars_bail, PolarsResult}; +use polars_error::{polars_bail, PolarsError, PolarsResult}; use simd_json::BorrowedValue; pub(crate) fn json_values_to_supertype( @@ -10,7 +10,7 @@ pub(crate) fn json_values_to_supertype( infer_schema_len: NonZeroUsize, ) -> PolarsResult { // struct types may have missing fields so find supertype - values + let out_opt: Option> = values .iter() .take(infer_schema_len.into()) .map(|value| polars_json::json::infer(value).map(|dt| DataType::from(&dt))) @@ -18,8 +18,21 @@ pub(crate) fn json_values_to_supertype( let l = l?; let r = r?; try_get_supertype(&l, &r) - }) - .unwrap_or_else(|| polars_bail!(ComputeError: "could not infer data-type")) + }); + match (out_opt, values.len()==0) { + (Some(out), true) => { + match out { + Ok(out)=>Ok(out), + _=>Err(PolarsError::NoData("no data".into())) + } + }, + (Some(out), false) => { + out + }, + (None, true) => Err(PolarsError::NoData("no data".into())), + (None, false) => polars_bail!(ComputeError: "could not infer data-type") + + } } pub(crate) fn dtypes_to_supertype>( diff --git a/crates/polars-io/src/json/mod.rs b/crates/polars-io/src/json/mod.rs index 1a8f9eb8f5a4..1d909bb12dce 100644 --- a/crates/polars-io/src/json/mod.rs +++ b/crates/polars-io/src/json/mod.rs @@ -71,7 +71,7 @@ use std::ops::Deref; use arrow::legacy::conversion::chunk_to_struct; use polars_core::error::to_compute_err; use polars_core::prelude::*; -use polars_error::{polars_bail, PolarsResult}; +use polars_error::{polars_bail, PolarsError, PolarsResult}; use polars_json::json::write::FallibleStreamingIterator; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -274,27 +274,41 @@ where let mut bytes = rb.deref().to_vec(); let json_value = simd_json::to_borrowed_value(&mut bytes).map_err(to_compute_err)?; - - // struct type - let dtype = if let Some(mut schema) = self.schema { + let dtype_result = if let Some(mut schema) = self.schema { if let Some(overwrite) = self.schema_overwrite { let mut_schema = Arc::make_mut(&mut schema); overwrite_schema(mut_schema, overwrite)?; } - - DataType::Struct(schema.iter_fields().collect()).to_arrow(CompatLevel::newest()) + Ok(DataType::Struct(schema.iter_fields().collect()) + .to_arrow(CompatLevel::newest())) } else { // infer - let inner_dtype = if let BorrowedValue::Array(values) = &json_value { - infer::json_values_to_supertype( + let inner_dtype_result = if let BorrowedValue::Array(values) = &json_value { + let supertype = infer::json_values_to_supertype( values, self.infer_schema_len .unwrap_or(NonZeroUsize::new(usize::MAX).unwrap()), - )? - .to_arrow(CompatLevel::newest()) + ); + match supertype { + Ok(supertype) => Ok(supertype.to_arrow(CompatLevel::newest())), + Err(e) => Err(e), + } } else { - polars_json::json::infer(&json_value)? + polars_json::json::infer(&json_value) }; + if inner_dtype_result.is_err() { + match &json_value { + BorrowedValue::Array(array) => { + if array.is_empty() { + return Ok(DataFrame::empty()); + } + }, + _ => { + polars_bail!(ComputeError: "could not infer data-type") + }, + } + } + let inner_dtype = inner_dtype_result?; if let Some(overwrite) = self.schema_overwrite { let ArrowDataType::Struct(fields) = inner_dtype else { @@ -304,18 +318,23 @@ where let mut schema = Schema::from_iter(fields.iter().map(Into::::into)); overwrite_schema(&mut schema, overwrite)?; - DataType::Struct( + Ok(DataType::Struct( schema .into_iter() .map(|(name, dt)| Field::new(name, dt)) .collect(), ) - .to_arrow(CompatLevel::newest()) + .to_arrow(CompatLevel::newest())) } else { - inner_dtype + Ok(inner_dtype) } }; - + if let Err(e) = &dtype_result { + if let PolarsError::NoData(_) = e { + return Ok(DataFrame::empty()); + } + }; + let dtype = dtype_result?; let dtype = if let BorrowedValue::Array(_) = &json_value { ArrowDataType::LargeList(Box::new(arrow::datatypes::Field::new( PlSmallStr::from_static("item"), diff --git a/py-polars/tests/unit/io/test_json.py b/py-polars/tests/unit/io/test_json.py index 4bce4ee4e0ce..6670d0bf6a42 100644 --- a/py-polars/tests/unit/io/test_json.py +++ b/py-polars/tests/unit/io/test_json.py @@ -306,7 +306,7 @@ def test_ndjson_null_inference_13183() -> None: } -@pytest.mark.write_disk +@pytest.mark.write_disk() @typing.no_type_check def test_json_wrong_input_handle_textio(tmp_path: Path) -> None: # this shouldn't be passed, but still we test if we can handle it gracefully @@ -385,3 +385,13 @@ def test_empty_json() -> None: df = pl.read_json(b'{"j":{}}') assert df.dtypes == [pl.Struct([])] assert df.shape == (0, 1) + + +def test_empty_list_json() -> None: + df = pl.read_json(io.StringIO("[]")) + assert df.shape == (0, 0) + assert isinstance(df, pl.DataFrame) + + df = pl.read_json(b"[]") + assert df.shape == (0, 0) + assert isinstance(df, pl.DataFrame)