Skip to content

Commit

Permalink
Fix date parsing logic
Browse files Browse the repository at this point in the history
  • Loading branch information
hsyyid committed Jan 31, 2025
1 parent e631985 commit b77bd6d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
13 changes: 7 additions & 6 deletions gluestick/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,16 @@ def get(self, stream, default=None, catalog_types=False, **kwargs):
types_params = self.get_types_from_catalog(catalog, stream)
kwargs.update(types_params)
df = pd.read_csv(filepath, **kwargs)

# needed to handle chunked CSVs properly
if isinstance(df, TextFileReader):
return df, kwargs.get("parse_dates", [])

# if a date field value is empty read_csv will read it as "object"
# make sure all date fields are typed as date
for date_col in kwargs.get("parse_dates", []):
# needed to handle chunked CSVs properly
if isinstance(df, TextFileReader):
for d in df:
d[date_col] = pd.to_datetime(d[date_col], errors='coerce')
else:
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
df[date_col] = pd.to_datetime(df[date_col], errors='coerce')

return df

def get_metadata(self, stream):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="gluestick",
version="2.2.4",
version="2.2.5",
description="ETL utility functions built on Pandas",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit b77bd6d

Please sign in to comment.