Skip to content

Commit

Permalink
fix: charachter function
Browse files Browse the repository at this point in the history
  • Loading branch information
agistyaanugrah committed Mar 5, 2024
1 parent 8b99f97 commit 2bf538d
Showing 1 changed file with 13 additions and 15 deletions.
28 changes: 13 additions & 15 deletions datasae/profiling/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def check_missing_cells(data: list) -> int:
return missing_cells

@staticmethod
def check_characters_and_unicode(data: list) -> dict:
def check_characters(data: list) -> dict:
"""
Check total characters and unicode of list of data.
Expand All @@ -136,21 +136,18 @@ def check_characters_and_unicode(data: list) -> dict:
"""
total_characters = 0
characters = ""
for row in data:
values = list(row.values())
for value in values:
if isinstance(value, str):
value = re.sub(r"[^a-zA-Z]", "", value)
total_characters += len(value)
characters += value

characters = "".join(set(characters))

return {
"characters": total_characters,
"unicode": len(characters),
for value in data:
value = re.sub(r"[^a-zA-Z]", "", value)
total_characters += len(value)
characters += value

result = {
"total_characters": sum(len(i) for i in data),
"distinct_characters": len("".join(set(characters))),
}

return result

@staticmethod
def check_data_types(data: list) -> dict:
"""
Expand Down Expand Up @@ -313,7 +310,7 @@ def check_coeff_var(data: list) -> float:

def profiling(self):
data = self.dataFrame.to_dict(orient="records")
data2 = self.dataFrame.to_dict()
data2 = self.dataFrame.to_dict(orient="list")
result = {
"overview": {
"number_of_observations": self.check_number_of_observations(
Expand Down Expand Up @@ -353,5 +350,6 @@ def profiling(self):
if value == "Text":
result["variables"][key] = {
"data_type": value,
"characters": self.check_characters(data2[key])
}
return result

0 comments on commit 2bf538d

Please sign in to comment.