Skip to content

Commit

Permalink
fix: Issue #2 Add support for merge on columns with numeric ID
Browse files Browse the repository at this point in the history
  • Loading branch information
fburic committed Jan 2, 2024
1 parent 585aa11 commit cceea61
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 4 deletions.
8 changes: 4 additions & 4 deletions grove/grove.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,11 +355,11 @@ def dataframe_list(self):
return sorted(self._data_frames.keys())


def merge(df_list: list, on: Union[str, list] = None) -> pd.DataFrame:
def merge(df_list: list, on: Union[str, int, list] = None) -> pd.DataFrame:
"""
Merge multiple DataFrames.
This module-level function allows more flexibility in passing DataFrame
while doing any on-the-fly operations.
This module-level function allows more flexibility in passing DataFrames
while doing any on-the-fly operations, outside a Collection.
Example
-------
Expand Down Expand Up @@ -435,7 +435,7 @@ def merge(df_list: list, on: Union[str, list] = None) -> pd.DataFrame:
# Normal form is [[ids, ids], [ids, ids], ... ]
if on is None:
id_list = [[None, None] for _ in range(len(df_list) - 1)]
elif isinstance(on, str):
elif isinstance(on, str) or isinstance(on, int):
id_list = [[on, on] for _ in range(len(df_list) - 1)]
elif isinstance(on, list):
id_list = on
Expand Down
21 changes: 21 additions & 0 deletions test/test_bugs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# Tests for bugs
from contextlib import redirect_stdout
import io

import grove
import pandas as pd


def test_issue_1_info_pandas_2():
Expand All @@ -23,3 +25,22 @@ def test_issue_1_info_pandas_2():
assert 'categories' in grove_print
assert 'measurements' in grove_print
assert 'TOTAL' in grove_print


def test_issue_2_numerical_col_names():
df1 = pd.DataFrame(
[('a', 1),
['b', 2]]
)
df2 = pd.DataFrame(
[('x', 1),
['b', 3]]
)
data = grove.Collection({
'A': df1,
'B': df2
})
result = data.merge(['A', 'B'], on=0)
expected_result = pd.merge(df1, df2, on=0)
assert result.shape[0] > 0
assert result.compare(expected_result).empty
17 changes: 17 additions & 0 deletions test/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import io

import grove
import pandas as pd


def test_create_collection_from_spec():
Expand All @@ -22,6 +23,22 @@ def test_create_collection_from_spec():
assert data['measurements'].shape == (15, 3)


def test_create_collection_from_data():
df1 = pd.DataFrame(
[('a', 1),
['b', 2]]
)
df2 = pd.DataFrame(
[('x', 1),
['b', 3]]
)
data = grove.Collection({
'A': df1,
'B': df2
})
assert len(data.dataframe_list) == 2


def test_collection_inspection():
data = grove.Collection(
[('items', 'test/data/items.csv'),
Expand Down

0 comments on commit cceea61

Please sign in to comment.