diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ebe951b..993483b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ 1. [#335](https://github.com/influxdata/influxdb-client-python/pull/335): Add support for custom precision for index specified as number [DataFrame] 1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): Add support for handling batch events +### Bug Fixes +1. [#348](https://github.com/influxdata/influxdb-client-python/pull/348): Optimize appending new columns to Pandas DataFrame [DataFrame] + ### Documentation 1. [#331](https://github.com/influxdata/influxdb-client-python/pull/331): Add [Migration Guide](MIGRATION_GUIDE.rst) 1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): How to handle client errors diff --git a/influxdb_client/client/flux_csv_parser.py b/influxdb_client/client/flux_csv_parser.py index 00ae04d9..4e7e3d27 100644 --- a/influxdb_client/client/flux_csv_parser.py +++ b/influxdb_client/client/flux_csv_parser.py @@ -133,9 +133,8 @@ def _parse_flux_response(self): # Create DataFrame with default values if self._serialization_mode is FluxSerializationMode.dataFrame: from ..extras import pd - self._data_frame = pd.DataFrame(data=[], columns=[], index=None) - for column in table.columns: - self._data_frame[column.label] = column.default_value + labels = list(map(lambda it: it.label, table.columns)) + self._data_frame = pd.DataFrame(data=[], columns=labels, index=None) pass continue diff --git a/tests/test_FluxCSVParser.py b/tests/test_FluxCSVParser.py index 3112f0a9..f72d6933 100644 --- a/tests/test_FluxCSVParser.py +++ b/tests/test_FluxCSVParser.py @@ -212,11 +212,38 @@ def test_to_json(self): import json self.assertEqual(query_output, json.dumps(tables, cls=FluxStructureEncoder, indent=2)) + def test_pandas_lot_of_columns(self): + data_types = "" + groups = "" + defaults = "" + columns = "" + values = "" + for i in range(0, 200): + data_types += f",long" + groups += f",false" + defaults += f"," + columns += f",column_{i}" + values += f",{i}" + + data = f"#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,double,string{data_types}\n" \ + f"#group,false,false,true,true,true,true,false,false,true{groups}\n" \ + f"#default,_result,,,,,,,,{defaults}\n" \ + f",result,table,_field,_measurement,_start,_stop,_time,_value,tag{columns}\n" \ + f",,0,value,python_client_test,2010-02-27T04:48:32.752600083Z,2020-02-27T16:48:32.752600083Z,2020-02-27T16:20:00Z,2,test1{values}\n" \ + + parser = self._parse(data=data, serialization_mode=FluxSerializationMode.dataFrame) + _dataFrames = list(parser.generator()) + self.assertEqual(1, _dataFrames.__len__()) + @staticmethod - def _parse_to_tables(data: str): - fp = BytesIO(str.encode(data)) - _parser = FluxCsvParser(response=HTTPResponse(fp, preload_content=False), - serialization_mode=FluxSerializationMode.tables) + def _parse_to_tables(data: str, serialization_mode=FluxSerializationMode.tables): + _parser = FluxCsvParserTest._parse(data, serialization_mode) list(_parser.generator()) tables = _parser.tables return tables + + @staticmethod + def _parse(data, serialization_mode): + fp = BytesIO(str.encode(data)) + return FluxCsvParser(response=HTTPResponse(fp, preload_content=False), + serialization_mode=serialization_mode)