Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: Additional Granular values #24

Merged
merged 3 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
17 changes: 17 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,23 @@ class GeographySettings(BaseSettings):
}


class OtherSettings(BaseSettings):

AIRLINE_KEYWORD = ".*airline[s]*[_names]{0,}"
AIRPORT_KEYWORD = ".*airport[s]*[_names]{0,}"
LANGUAGE_KEYWORD = ".*language.*"
CROPS_KEYWORD = ".*crop[s]*[_names]{0,}"
GENDER_KEYWORD = ".*gender.*"

GRANULARITY_REPRESENTATION = {
"airline": "Airline",
"airport": "Airport",
"language": "Language",
"crop": "Crop",
"gender": "Gender",
}


class UnitSettings(BaseSettings):

UNIT_KEYWORD = "unit"
Expand Down
45 changes: 42 additions & 3 deletions app/utils/columns_mapping.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import re
from itertools import chain

from fastapi.logger import logger

from app.core.config import (
DateTimeSettings,
GeographySettings,
NoteSettings,
OtherSettings,
UnitSettings,
)

datetime_settings = DateTimeSettings()
geography_settings = GeographySettings()
unit_settings = UnitSettings()
note_settings = NoteSettings()
other_settings = OtherSettings()


def extract_pattern_from_columns(
Expand All @@ -24,6 +24,44 @@ def extract_pattern_from_columns(
return matched_columns, columns.difference(matched_columns)


async def find_other_granular_columns(columns: set):
airline_pattern = re.compile(
r".*({})".format(other_settings.AIRLINE_KEYWORD)
)
airport_pattern = re.compile(
r".*({})".format(other_settings.AIRPORT_KEYWORD)
)
language_pattern = re.compile(
r".*({})".format(other_settings.LANGUAGE_KEYWORD)
)
crop_pattern = re.compile(r".*({})".format(other_settings.CROPS_KEYWORD))
gender_pattern = re.compile(
r".*({})".format(other_settings.AIRPORT_KEYWORD)
)

airline_columns, columns = extract_pattern_from_columns(
columns, airline_pattern
)
airport_columns, columns = extract_pattern_from_columns(
columns, airport_pattern
)
language_columns, columns = extract_pattern_from_columns(
columns, language_pattern
)
crop_columns, columns = extract_pattern_from_columns(columns, crop_pattern)
gender_columns, columns = extract_pattern_from_columns(
columns, gender_pattern
)

return {
"airline": airline_columns,
"airport": airport_columns,
"language": language_columns,
"crop": crop_columns,
"gender": gender_columns,
}


async def find_datetime_columns(columns: set):
non_cal_year_pattern = re.compile(
r".*({}|{})".format(
Expand Down Expand Up @@ -64,7 +102,7 @@ async def find_datetime_columns(columns: set):
columns, month_pattern
)
date_columns, columns = extract_pattern_from_columns(columns, date_pattern)
logger.info(f"date_columns: {date_columns}")

# filter out `as_on_date` from date columns
date_columns = {
col for col in date_columns if not as_on_date_pattern.match(col)
Expand Down Expand Up @@ -145,4 +183,5 @@ async def find_mapped_columns(columns):
list(chain.from_iterable(mapped_columns.values()))
)
)

return {**mapped_columns, "unmapped": not_mapped_columns}
16 changes: 15 additions & 1 deletion app/utils/granularity.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
from app.core.config import DateTimeSettings, GeographySettings
from app.core.config import DateTimeSettings, GeographySettings, OtherSettings
from app.utils.columns_mapping import (
find_datetime_columns,
find_geography_columns,
find_other_granular_columns,
)
from app.utils.common import get_key_from_dict

datetime_settings = DateTimeSettings()
geographic_settings = GeographySettings()
other_settings = OtherSettings()


async def get_granularity(columns):
datetime_columns = await find_datetime_columns(columns)
geographic_columns = await find_geography_columns(columns)
other_granular_columns = await find_other_granular_columns(columns)

datetime_columns = {
key: value for key, value in datetime_columns.items() if value
}
geographic_columns = {
key: value for key, value in geographic_columns.items() if value
}
other_granular_columns = {
key: value for key, value in other_granular_columns.items() if value
}

sorted_datetime_columns = sorted(
datetime_columns.items(),
Expand Down Expand Up @@ -48,4 +54,12 @@ async def get_granularity(columns):
]
)

if len(other_granular_columns) > 0:
granularity_values.extend(
[
other_settings.GRANULARITY_REPRESENTATION[key]
for key in other_granular_columns.keys()
]
)

return {"granularity": ", ".join(granularity_values)}
2 changes: 1 addition & 1 deletion app/utils/temporal_coverage.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,5 +165,5 @@ async def get_temporal_coverage(dataset, mapped_columns: dict):
# temporal_coverage = temporal_coverage_representation(
# year_in_sequence, year_mapping
# )
logger.warning(f"Temporal Coverage: {temporal_coverage}")

return {"temporal_coverage": temporal_coverage}