Skip to content

Commit

Permalink
chore: optimize appending new columns to Pandas DataFrame (#348)
Browse files Browse the repository at this point in the history
  • Loading branch information
bednar authored Oct 20, 2021
1 parent 242444d commit d284254
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 7 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
1. [#335](https://github.com/influxdata/influxdb-client-python/pull/335): Add support for custom precision for index specified as number [DataFrame]
1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): Add support for handling batch events

### Bug Fixes
1. [#348](https://github.com/influxdata/influxdb-client-python/pull/348): Optimize appending new columns to Pandas DataFrame [DataFrame]

### Documentation
1. [#331](https://github.com/influxdata/influxdb-client-python/pull/331): Add [Migration Guide](MIGRATION_GUIDE.rst)
1. [#341](https://github.com/influxdata/influxdb-client-python/pull/341): How to handle client errors
Expand Down
5 changes: 2 additions & 3 deletions influxdb_client/client/flux_csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,8 @@ def _parse_flux_response(self):
# Create DataFrame with default values
if self._serialization_mode is FluxSerializationMode.dataFrame:
from ..extras import pd
self._data_frame = pd.DataFrame(data=[], columns=[], index=None)
for column in table.columns:
self._data_frame[column.label] = column.default_value
labels = list(map(lambda it: it.label, table.columns))
self._data_frame = pd.DataFrame(data=[], columns=labels, index=None)
pass
continue

Expand Down
35 changes: 31 additions & 4 deletions tests/test_FluxCSVParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,38 @@ def test_to_json(self):
import json
self.assertEqual(query_output, json.dumps(tables, cls=FluxStructureEncoder, indent=2))

def test_pandas_lot_of_columns(self):
data_types = ""
groups = ""
defaults = ""
columns = ""
values = ""
for i in range(0, 200):
data_types += f",long"
groups += f",false"
defaults += f","
columns += f",column_{i}"
values += f",{i}"

data = f"#datatype,string,long,string,string,dateTime:RFC3339,dateTime:RFC3339,dateTime:RFC3339,double,string{data_types}\n" \
f"#group,false,false,true,true,true,true,false,false,true{groups}\n" \
f"#default,_result,,,,,,,,{defaults}\n" \
f",result,table,_field,_measurement,_start,_stop,_time,_value,tag{columns}\n" \
f",,0,value,python_client_test,2010-02-27T04:48:32.752600083Z,2020-02-27T16:48:32.752600083Z,2020-02-27T16:20:00Z,2,test1{values}\n" \

parser = self._parse(data=data, serialization_mode=FluxSerializationMode.dataFrame)
_dataFrames = list(parser.generator())
self.assertEqual(1, _dataFrames.__len__())

@staticmethod
def _parse_to_tables(data: str):
fp = BytesIO(str.encode(data))
_parser = FluxCsvParser(response=HTTPResponse(fp, preload_content=False),
serialization_mode=FluxSerializationMode.tables)
def _parse_to_tables(data: str, serialization_mode=FluxSerializationMode.tables):
_parser = FluxCsvParserTest._parse(data, serialization_mode)
list(_parser.generator())
tables = _parser.tables
return tables

@staticmethod
def _parse(data, serialization_mode):
fp = BytesIO(str.encode(data))
return FluxCsvParser(response=HTTPResponse(fp, preload_content=False),
serialization_mode=serialization_mode)

0 comments on commit d284254

Please sign in to comment.