Skip to content

Commit

Permalink
Modify add_column() to optionally accept a FeatureType param
Browse files Browse the repository at this point in the history
  • Loading branch information
Varad Bhatnagar committed Sep 10, 2024
1 parent ca58154 commit 383a18e
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions src/datasets/arrow_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5613,7 +5613,9 @@ def push_to_hub(

@transmit_format
@fingerprint_transform(inplace=False)
def add_column(self, name: str, column: Union[list, np.array], new_fingerprint: str):
def add_column(
self, name: str, column: Union[list, np.array], new_fingerprint: str, feature: Optional[FeatureType] = None
):
"""Add column to Dataset.
<Added version="1.7"/>
Expand All @@ -5640,7 +5642,13 @@ def add_column(self, name: str, column: Union[list, np.array], new_fingerprint:
})
```
"""
column_table = InMemoryTable.from_pydict({name: column})

if feature:
pyarrow_schema = Features({name: feature}).arrow_schema
else:
pyarrow_schema = None

column_table = InMemoryTable.from_pydict({name: column}, schema=pyarrow_schema)
_check_column_names(self._data.column_names + column_table.column_names)
dataset = self.flatten_indices() if self._indices is not None else self
# Concatenate tables horizontally
Expand Down

0 comments on commit 383a18e

Please sign in to comment.