Skip to content

Commit

Permalink
Merge pull request #35 from factly/fix/decimals-in-samples
Browse files Browse the repository at this point in the history
Add inferschema on read_csv to infer correct datatype
  • Loading branch information
paul-tharun authored Mar 28, 2024
2 parents d0f0f87 + 6b95bdf commit f93a31f
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 5 deletions.
8 changes: 4 additions & 4 deletions app/utils/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async def get_dataframe_honouring_encoding_async(
pl.DataFrame: polars Dataframe object
"""
try:
df = pl.read_csv(source, null_values="NA", infer_schema_length=0)
df = pl.read_csv(source, null_values="NA", infer_schema_length=None)
except (UnicodeDecodeError, pl_exc.ComputeError) as err:
logger.warning(f"File encoding is not default: {err}")
logger.warning("Trying to read file with proper encoding")
Expand All @@ -59,7 +59,7 @@ async def get_dataframe_honouring_encoding_async(
source,
null_values="NA",
encoding=encoding,
infer_schema_length=0,
infer_schema_length=None,
)
return df

Expand All @@ -78,7 +78,7 @@ def get_dataframe_honouring_encoding(
pl.DataFrame: polars Dataframe object
"""
try:
df = pl.read_csv(source, null_values="NA", infer_schema_length=0)
df = pl.read_csv(source, null_values="NA", infer_schema_length=None)
except (UnicodeDecodeError, pl_exc.ComputeError) as err:
logger.error(f"Could not interpret File encoding : {err}")
encoding = get_encoding(obj=source, is_object=is_object)
Expand All @@ -87,7 +87,7 @@ def get_dataframe_honouring_encoding(
source,
null_values="NA",
encoding=encoding,
infer_schema_length=0,
infer_schema_length=None,
)
return df

Expand Down
8 changes: 7 additions & 1 deletion app/utils/profile_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from numpy import bool_
from pandas import DataFrame
from pydantic import parse_obj_as
from ydata_profiling import ProfileReport

from app.core.config import Settings
from app.models.analysis import Analysis
Expand Down Expand Up @@ -38,7 +39,9 @@ def json_conversion_objects(obj):


class ProfileSegments:
def __init__(self, pandas_profile, columns=None, round_to=3):
def __init__(
self, pandas_profile: ProfileReport, columns=None, round_to=3
):
"""
Pass pandas profile of a dataset as argument
"""
Expand Down Expand Up @@ -93,6 +96,9 @@ def package(self) -> Dict:
def samples(self) -> List[Sample]:
# get samples
samples = self.profile_description.sample
import logging

logging.error(samples)
for sample in samples:
sample.data = sample.data.round(decimals=self.round_to).to_json()
# * 'head' and 'tail' are returned as dataset sample
Expand Down

0 comments on commit f93a31f

Please sign in to comment.