Skip to content

Commit

Permalink
feat: Convert between Excel file and Table (#233)
Browse files Browse the repository at this point in the history
Closes #138. 
Closes #139 .

### Summary of Changes
Created the methods to_excel_file() and from_excel_file() in Class
Table.

---------

Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
Co-authored-by: patrikguempel <patrikguempel@gmail.com>
Co-authored-by: Lars Reimann <mail@larsreimann.com>
Co-authored-by: patrikguempel <128832338+patrikguempel@users.noreply.github.com>
  • Loading branch information
5 people authored May 5, 2023
1 parent 4bc4c09 commit 0d7a998
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 2 deletions.
31 changes: 29 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pandas = "^2.0.0"
pillow = "^9.5.0"
scikit-learn = "^1.2.0"
seaborn = "^0.12.2"
openpyxl = "^3.1.2"

[tool.poetry.group.dev.dependencies]
pytest = "^7.2.1"
Expand Down
49 changes: 49 additions & 0 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import matplotlib.pyplot as plt
import numpy as np
import openpyxl
import pandas as pd
import seaborn as sns
from pandas import DataFrame
Expand Down Expand Up @@ -84,6 +85,33 @@ def from_csv_file(path: str | Path) -> Table:
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception

@staticmethod
def from_excel_file(path: str | Path) -> Table:
"""
Read data from an Excel file into a table.
Parameters
----------
path : str | Path
The path to the Excel file.
Returns
-------
table : Table
The table created from the Excel file.
Raises
------
FileNotFoundError
If the specified file does not exist.
ValueError
If the file could not be read.
"""
try:
return Table(pd.read_excel(path, engine="openpyxl", usecols=lambda colname: "Unnamed" not in colname))
except FileNotFoundError as exception:
raise FileNotFoundError(f'File "{path}" does not exist') from exception

@staticmethod
def from_json_file(path: str | Path) -> Table:
"""
Expand Down Expand Up @@ -1242,6 +1270,27 @@ def to_csv_file(self, path: str | Path) -> None:
data_to_csv.columns = self._schema.column_names
data_to_csv.to_csv(path, index=False)

def to_excel_file(self, path: str | Path) -> None:
"""
Write the data from the table into an Excel file.
If the file and/or the directories do not exist, they will be created. If the file already exists, it will be
overwritten.
Parameters
----------
path : str | Path
The path to the output file.
"""
# Create Excel metadata in the file
tmp_table_file = openpyxl.Workbook()
tmp_table_file.save(path)

Path(path).parent.mkdir(parents=True, exist_ok=True)
data_to_excel = self._data.copy()
data_to_excel.columns = self._schema.column_names
data_to_excel.to_excel(path)

def to_json_file(self, path: str | Path) -> None:
"""
Write the data from the table into a JSON file.
Expand Down
Binary file added tests/resources/dummy_excel_file.xlsx
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from pathlib import Path

import pytest
from safeds.data.tabular.containers import Table

from tests.helpers import resolve_resource_path


@pytest.mark.parametrize(
("path", "expected"),
[
(
resolve_resource_path("./dummy_excel_file.xlsx"),
Table.from_dict(
{
"A": [1],
"B": [2],
},
),
),
(
Path(resolve_resource_path("./dummy_excel_file.xlsx")),
Table.from_dict(
{
"A": [1],
"B": [2],
},
),
),
],
ids=["string path", "object path"],
)
def test_should_create_table_from_excel_file(path: str | Path, expected: Table) -> None:
table = Table.from_excel_file(path)
assert table == expected


def test_should_raise_if_file_not_found() -> None:
with pytest.raises(FileNotFoundError):
Table.from_excel_file(resolve_resource_path("test_table_from_excel_file_invalid.xls"))
26 changes: 26 additions & 0 deletions tests/safeds/data/tabular/containers/_table/test_to_excel_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pathlib import Path
from tempfile import NamedTemporaryFile

from safeds.data.tabular.containers import Table


def test_should_create_csv_file_from_table_by_str() -> None:
table = Table.from_dict({"col1": ["col1_1"], "col2": ["col2_1"]})
with NamedTemporaryFile(suffix=".xlsx") as tmp_table_file:
tmp_table_file.close()
with Path(tmp_table_file.name).open("w", encoding="utf-8") as tmp_file:
table.to_excel_file(tmp_file.name)
with Path(tmp_table_file.name).open("r", encoding="utf-8") as tmp_file:
table_r = Table.from_excel_file(tmp_file.name)
assert table == table_r


def test_should_create_csv_file_from_table_by_path() -> None:
table = Table.from_dict({"col1": ["col1_1"], "col2": ["col2_1"]})
with NamedTemporaryFile(suffix=".xlsx") as tmp_table_file:
tmp_table_file.close()
with Path(tmp_table_file.name).open("w", encoding="utf-8") as tmp_file:
table.to_excel_file(Path(tmp_file.name))
with Path(tmp_table_file.name).open("r", encoding="utf-8") as tmp_file:
table_r = Table.from_excel_file(Path(tmp_file.name))
assert table == table_r

0 comments on commit 0d7a998

Please sign in to comment.