diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c1cea41..3be04918 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Bug Fixes 1. [#375](https://github.com/influxdata/influxdb-client-python/pull/375): Construct `InfluxDBError` without HTTP response +1. [#378](https://github.com/influxdata/influxdb-client-python/pull/378): Correct serialization DataFrame with nan values [DataFrame] ### CI 1. [#370](https://github.com/influxdata/influxdb-client-python/pull/370): Add Python 3.10 to CI builds diff --git a/influxdb_client/client/write/dataframe_serializer.py b/influxdb_client/client/write/dataframe_serializer.py index 4d83e8d4..95ad09b0 100644 --- a/influxdb_client/client/write/dataframe_serializer.py +++ b/influxdb_client/client/write/dataframe_serializer.py @@ -27,9 +27,6 @@ def _any_not_nan(p, indexes): return any(map(lambda x: _not_nan(p[x]), indexes)) -_EMPTY_EXPRESSION = "_EMPTY_LINE_PROTOCOL_PART_" - - class DataframeSerializer: """Serialize DataFrame into LineProtocols.""" @@ -180,15 +177,13 @@ def __init__(self, data_frame, point_settings, precision=DEFAULT_WRITE_PRECISION field_value = f'{sep}{key_format}={{{val_format}}}' elif issubclass(value.type, np.floating): if null_columns[index]: - field_value = f"""{{ - "{sep}{_EMPTY_EXPRESSION}" if math.isnan({val_format}) else f"{sep}{key_format}={{{val_format}}}" - }}""" + field_value = f"""{{"" if math.isnan({val_format}) else f"{sep}{key_format}={{{val_format}}}"}}""" else: field_value = f'{sep}{key_format}={{{val_format}}}' else: if null_columns[index]: field_value = f"""{{ - '{sep}{_EMPTY_EXPRESSION}' if type({val_format}) == float and math.isnan({val_format}) else + '' if type({val_format}) == float and math.isnan({val_format}) else f'{sep}{key_format}="{{str({val_format}).translate(_ESCAPE_STRING)}}"' }}""" else: @@ -249,7 +244,7 @@ def serialize(self, chunk_idx: int = None): if self.first_field_maybe_null: # When the first field is null (None/NaN), we'll have # a spurious leading comma which needs to be removed. - lp = (re.sub(f",{_EMPTY_EXPRESSION}|{_EMPTY_EXPRESSION},|{_EMPTY_EXPRESSION}", '', self.f(p)) + lp = (re.sub('^(( |[^ ])* ),([a-zA-Z])(.*)', '\\1\\3\\4', self.f(p)) for p in filter(lambda x: _any_not_nan(x, self.field_indexes), _itertuples(chunk))) return list(lp) else: diff --git a/tests/test_WriteApiDataFrame.py b/tests/test_WriteApiDataFrame.py index ab1feb29..3e2d91cc 100644 --- a/tests/test_WriteApiDataFrame.py +++ b/tests/test_WriteApiDataFrame.py @@ -396,6 +396,23 @@ def test_serialize_strings_with_commas(self): self.assertEqual("bookings,Account=Testaccount,Category=Testcategory,Entry\\ Type=Expense Currencs=\"EUR\",Note=\"This, works\",Recurring=\"no\",Value=-1.0 1538352000000000000", points[0]) self.assertEqual("bookings,Account=Testaccount,Category=Testcategory,Entry\\ Type=Expense Currencs=\"EUR\",Note=\"This , works not\",Recurring=\"no\",Value=-1.0 1538438400000000000", points[1]) + def test_without_tags_and_fields_with_nan(self): + from influxdb_client.extras import pd, np + + df = pd.DataFrame({ + 'a': np.arange(0., 3.), + 'b': [0., np.nan, 1.], + }).set_index(pd.to_datetime(['2021-01-01 0:00', '2021-01-01 0:01', '2021-01-01 0:02'])) + + points = data_frame_to_list_of_points(data_frame=df, + data_frame_measurement_name="test", + point_settings=PointSettings()) + + self.assertEqual(3, len(points)) + self.assertEqual("test a=0.0,b=0.0 1609459200000000000", points[0]) + self.assertEqual("test a=1.0 1609459260000000000", points[1]) + self.assertEqual("test a=2.0,b=1.0 1609459320000000000", points[2]) + class DataSerializerChunksTest(unittest.TestCase): def test_chunks(self):