Skip to content

Commit

Permalink
fix: missing cels and linter
Browse files Browse the repository at this point in the history
  • Loading branch information
agistyaanugrah committed Feb 29, 2024
1 parent bb117b3 commit 9973ee5
Showing 1 changed file with 30 additions and 11 deletions.
41 changes: 30 additions & 11 deletions datasae/profiling/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def check_head_and_tail(data: list) -> dict:
return head, tail

@staticmethod
def check_number_of_variables(columns: list) -> int:
def check_number_of_variables(data: list) -> int:
"""
Generate the number of variables in a given list of columns.
Expand All @@ -87,7 +87,7 @@ def check_number_of_variables(columns: list) -> int:
Returns:
int: A integer containing the total number of columns.
"""
count = len(columns)
count = len(data[0].keys())
return count

@staticmethod
Expand All @@ -104,19 +104,20 @@ def check_missing_cells(data: list) -> int:
missing_cells = 0
for row in data:
value = list(row.values())
value = [a.strip() if isinstance(a, str) else a for a in value]
value = [
a.strip() if isinstance(a, str) else a for a in value
]
value = [
""
if (isinstance(a, float) or isinstance(a, int))
and str(a).lower() == "nan"
else a
(
""
if (isinstance(a, float) or isinstance(a, int))
and str(a).lower() == "nan"
else a
)
for a in value
]

missing_cells += len(value.index(None))
missing_cells += len(value.index(""))
missing_cells += sum(
1 if r == "" or r is None else 0 for r in value
)

return missing_cells

Expand Down Expand Up @@ -147,3 +148,21 @@ def check_characters_and_unicode(data: list) -> dict:
"characters": total_characters,
"unicode": len(characters),
}

def profiling(self):
data = self.dataFrame.to_dict(orient="records")
result = {
"overview": {
"number_of_observations": self.check_number_of_observations(
data
),
"number_of_variables": self.check_number_of_variables(data),
"missing_cells": self.check_missing_cells(data),
"duplicate_rows": self.check_duplicate_rows(data),
},
"sample": {
"head": self.check_head_and_tail(data)[0],
"tail": self.check_head_and_tail(data)[1],
},
}
return result

0 comments on commit 9973ee5

Please sign in to comment.