pandas-dev · jreback · Dec 19, 2021 · Dec 2, 2021 · Dec 7, 2021 · Dec 8, 2021
diff --git a/doc/source/development/developer.rst b/doc/source/development/developer.rst
@@ -180,7 +180,7 @@ As an example of fully-formed metadata:
          'numpy_type': 'int64',
          'metadata': None}
     ],
-    'pandas_version': '0.20.0',
+    'pandas_version': '1.4.0',
     'creator': {
       'library': 'pyarrow',
       'version': '0.13.0'

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1903,6 +1903,7 @@ with optional parameters:
      ``index``; dict like {index -> {column -> value}}
      ``columns``; dict like {column -> {index -> value}}
      ``values``; just the values array
+     ``table``; adhering to the JSON `Table Schema`_
 
 * ``date_format`` : string, type of date conversion, 'epoch' for timestamp, 'iso' for ISO8601.
 * ``double_precision`` : The number of decimal places to use when encoding floating point values, default 10.
@@ -1919,6 +1920,18 @@ Note ``NaN``'s, ``NaT``'s and ``None`` will be converted to ``null`` and ``datet
    json = dfj.to_json()
    json
 
+.. note::
+
+   When using ``orient='table'`` along with user-defined ``ExtensionArray``,
+   the generated schema will contain an additional ``extDtype`` key in the respective
+   ``fields`` element. This extra key is not standard but does enable JSON roundtrips
+   for extension types (e.g. ``read_json(df.to_json(orient="table"), orient="table")``).
+
+   The ``extDtype`` key carries the name of the extension, if you have properly registered
+   the ``ExtensionDtype``, pandas will use said name to perform a lookup into the registry
+   and re-convert the serialized data into your custom dtype.
+
+
 Orient options
 ++++++++++++++
 
@@ -2477,6 +2490,10 @@ A few notes on the generated table schema:
     * For ``MultiIndex``, ``mi.names`` is used. If any level has no name,
       then ``level_<i>`` is used.
 
+* When using a ``DataFrame`` containing a ``Series`` backed by a used-defined
+  ``ExtensionArray``, the generated JSON will contain an extra ``extDtype``
+  key under the respective ``fields`` array element. While this key is not standard
+  it enables roundtripping for custom types (e.g. ``read_json(df.to_json(orient="table"), orient="table")``).
 
 ``read_json`` also accepts ``orient='table'`` as an argument. This allows for
 the preservation of metadata such as dtypes and index names in a

diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst
@@ -29,7 +29,6 @@ Fixed regressions
 Bug fixes
 ~~~~~~~~~
 -
--
 
 .. ---------------------------------------------------------------------------
 

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -229,6 +229,7 @@ Other enhancements
 - :meth:`Series.info` has been added, for compatibility with :meth:`DataFrame.info` (:issue:`5167`)
 - Implemented :meth:`IntervalArray.min`, :meth:`IntervalArray.max`, as a result of which ``min`` and ``max`` now work for :class:`IntervalIndex`, :class:`Series` and :class:`DataFrame` with ``IntervalDtype`` (:issue:`44746`)
 - :meth:`UInt64Index.map` now retains ``dtype`` where possible (:issue:`44609`)
+- :class:`ExtensionDtype` and :class:`ExtensionArray` are now (de)serialized when exporting a :class:`DataFrame` with :meth:`DataFrame.to_json` using ``orient='table'`` (:issue:`20612`, :issue:`44705`).
 -
 
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2444,6 +2444,11 @@ def to_json(
         ``orient='table'`` contains a 'pandas_version' field under 'schema'.
         This stores the version of `pandas` used in the latest revision of the
         schema.
+        When using :class:`ExtensionDtype`-kind columns, the schema fields will
+        carry 'extDtype', this field stores the :class:`ExtensionDtype` name
+        and is used to resolve the correct dtype during deserialization.
+        This procedure is handled by the :class:`ExtensionDtype`'s
+        :func:`_from_sequence` method.
 
         Examples
         --------
@@ -2567,7 +2572,7 @@ def to_json(
                 "primaryKey": [
                     "index"
                 ],
-                "pandas_version": "0.20.0"
+                "pandas_version": "1.4.0"
             }},
             "data": [
                 {{

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
@@ -68,8 +68,6 @@
 loads = json.loads
 dumps = json.dumps
 
-TABLE_SCHEMA_VERSION = "0.20.0"
-
 
 # interface to/from
 def to_json(
@@ -565,7 +563,7 @@ def read_json(
 {{"name":"col 1","type":"string"}},\
 {{"name":"col 2","type":"string"}}],\
 "primaryKey":["index"],\
-"pandas_version":"0.20.0"}},\
+"pandas_version":"1.4.0"}},\
 "data":[\
 {{"index":"row 1","col 1":"a","col 2":"b"}},\
 {{"index":"row 2","col 1":"c","col 2":"d"}}]\

diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
@@ -18,11 +18,13 @@
     JSONSerializable,
 )
 
+from pandas.core.dtypes.base import _registry as registry
 from pandas.core.dtypes.common import (
     is_bool_dtype,
     is_categorical_dtype,
     is_datetime64_dtype,
     is_datetime64tz_dtype,
+    is_extension_array_dtype,
     is_integer_dtype,
     is_numeric_dtype,
     is_period_dtype,
@@ -40,6 +42,8 @@
 
 loads = json.loads
 
+TABLE_SCHEMA_VERSION = "1.4.0"
+
 
 def as_json_table_type(x: DtypeObj) -> str:
     """
@@ -83,6 +87,8 @@ def as_json_table_type(x: DtypeObj) -> str:
         return "duration"
     elif is_categorical_dtype(x):
         return "any"
+    if is_extension_array_dtype(x):
+        return "any"
     elif is_string_dtype(x):
         return "string"
     else:
@@ -134,6 +140,8 @@ def convert_pandas_type_to_json_field(arr):
         field["freq"] = dtype.freq.freqstr
     elif is_datetime64tz_dtype(dtype):
         field["tz"] = dtype.tz.zone
+    elif is_extension_array_dtype(dtype):
+        field["extDtype"] = dtype.name
     return field
 
 
@@ -199,6 +207,8 @@ def convert_json_field_to_pandas_type(field):
             return CategoricalDtype(
                 categories=field["constraints"]["enum"], ordered=field["ordered"]
             )
+        if "extDtype" in field:
+            return registry.find(field["extDtype"])
         else:
             return "object"
 
@@ -257,7 +267,7 @@ def build_table_schema(
 {'name': 'B', 'type': 'string'}, \
 {'name': 'C', 'type': 'datetime'}], \
 'primaryKey': ['idx'], \
-'pandas_version': '0.20.0'}
+'pandas_version': '1.4.0'}
     """
     if index is True:
         data = set_default_names(data)
@@ -291,7 +301,7 @@ def build_table_schema(
         schema["primaryKey"] = primary_key
 
     if version:
-        schema["pandas_version"] = "0.20.0"
+        schema["pandas_version"] = TABLE_SCHEMA_VERSION
     return schema
 
 

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -67,8 +67,11 @@ class DecimalArray(OpsMixin, ExtensionScalarOpsMixin, ExtensionArray):
 
     def __init__(self, values, dtype=None, copy=False, context=None):
         for i, val in enumerate(values):
-            if is_float(val) and np.isnan(val):
-                values[i] = DecimalDtype.na_value
+            if is_float(val):
+                if np.isnan(val):
+                    values[i] = DecimalDtype.na_value
+                else:
+                    values[i] = DecimalDtype.type(val)
             elif not isinstance(val, decimal.Decimal):
                 raise TypeError("All values must be of type " + str(decimal.Decimal))
         values = np.asarray(values, dtype=object)