Skip to content

Commit

Permalink
Merge pull request #68 from OCHA-DAP/hapi_328_fix_db_export
Browse files Browse the repository at this point in the history
HAPI-328 fix db export
  • Loading branch information
mcarans authored Jan 7, 2024
2 parents 777f2bc + 6000111 commit 5a372a1
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 31 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [0.6.6] - 2023-01-08

### Added

- Fix db export (wrong codes being used for age range)

## [0.6.5] - 2023-01-08

### Added
Expand Down
9 changes: 5 additions & 4 deletions src/hapi/pipelines/database/age_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class AgeRange(BaseUploader):
def __init__(self, session: Session, age_range_codes: List[str]):
super().__init__(session)
self.data = age_range_codes
self.patterns = []
self.pattern_to_code = {}

def populate(self):
logger.info("Populating age ranges table")
Expand All @@ -27,14 +27,15 @@ def populate_single(self, age_range_code: str):
if len(ages) == 2:
# Format: 0-5
age_min, age_max = int(ages[0]), int(ages[1])
pattern_string = f"age{age_min}_{age_max}"
pattern = f"age{age_min}_{age_max}"
else:
# Format: 80+
age_min = int(age_range_code.replace("+", ""))
age_max = None
pattern_string = f"age{age_min}plus"
pattern = f"age{age_min}plus"
age_range_row = DBAgeRange(
code=age_range_code, age_min=age_min, age_max=age_max
)
self._session.add(age_range_row)
self.patterns.append(TagPattern.parse(f"#*+{pattern_string}"))
tagpattern = TagPattern.parse(f"#*+{pattern}")
self.pattern_to_code[tagpattern] = age_range_code
5 changes: 3 additions & 2 deletions src/hapi/pipelines/database/gender.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ def __init__(self, session: Session, gender_descriptions: Dict[str, str]):
super().__init__(session)
self._gender_descriptions = gender_descriptions
self.data = []
self.patterns = []
self.pattern_to_code = {}

def populate(self):
logger.info("Populating gender table")
for gender, description in self._gender_descriptions.items():
gender_row = DBGender(code=gender, description=description)
self._session.add(gender_row)
self.data.append(gender)
self.patterns.append(TagPattern.parse(f"#*+{gender}"))
tagpattern = TagPattern.parse(f"#*+{gender}")
self.pattern_to_code[tagpattern] = gender
self._session.commit()
40 changes: 21 additions & 19 deletions src/hapi/pipelines/database/humanitarian_needs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,25 @@ def __init__(
super().__init__(session)
self._metadata = metadata
self._admins = admins
self.population_status_patterns = population_status.patterns
self.population_group_patterns = population_group.patterns
self.sector_patterns = sector.patterns
self.gender_patterns = gender.patterns
self.age_range_patterns = age_range.patterns
self.population_status_pattern_to_code = (
population_status.pattern_to_code
)
self.population_group_pattern_to_code = (
population_group.pattern_to_code
)
self.sector_pattern_to_code = sector.pattern_to_code
self.gender_pattern_to_code = gender.pattern_to_code
self.age_range_pattern_to_code = age_range.pattern_to_code
self.disabled_pattern = TagPattern.parse("#*+disabled")
self._results = results

def populate(self):
logger.info("Populating humanitarian needs table")

def match_column(col, patterns):
for pattern in patterns:
def match_column(col, pattern_to_code):
for pattern in pattern_to_code:
if pattern.match(col):
result = pattern.tag
if result and result != "#*":
return result[1:]
result = pattern.include_attributes
if result:
return next(iter(result))
break
return pattern_to_code[pattern]
return None

for dataset in self._results.values():
Expand All @@ -70,23 +68,27 @@ def match_column(col, patterns):
column = Column.parse(hxl_tag)
# "#inneed" "#affected"
population_status_code = match_column(
column, self.population_status_patterns
column, self.population_status_pattern_to_code
)
if not population_status_code:
raise ValueError(f"Invalid HXL tag {hxl_tag}!")
# "#*+idps" "#*+refugees"
population_group_code = match_column(
column, self.population_group_patterns
column, self.population_group_pattern_to_code
)
# "#*+wsh" "#*+pro_gbv"
sector_code = match_column(column, self.sector_patterns)
sector_code = match_column(
column, self.sector_pattern_to_code
)
if sector_code:
sector_code = sector_code.upper()
# "#*+f" "#*+m"
gender_code = match_column(column, self.gender_patterns)
gender_code = match_column(
column, self.gender_pattern_to_code
)
# "#*+age0_4" "#*+age80plus"
age_range_code = match_column(
column, self.age_range_patterns
column, self.age_range_pattern_to_code
)
# "#*+disabled"
disabled_marker = self.disabled_pattern.match(column)
Expand Down
5 changes: 3 additions & 2 deletions src/hapi/pipelines/database/population_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(
super().__init__(session)
self._population_group_descriptions = population_group_descriptions
self.data = []
self.patterns = []
self.pattern_to_code = {}

def populate(self):
logger.info("Populating population group table")
Expand All @@ -30,5 +30,6 @@ def populate(self):
)
self._session.add(population_group_row)
self.data.append(population_group)
self.patterns.append(TagPattern.parse(f"#*+{population_group}"))
tagpattern = TagPattern.parse(f"#*+{population_group}")
self.pattern_to_code[tagpattern] = population_group
self._session.commit()
5 changes: 3 additions & 2 deletions src/hapi/pipelines/database/population_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(
super().__init__(session)
self._population_status_descriptions = population_status_descriptions
self.data = []
self.patterns = []
self.pattern_to_code = {}

def populate(self):
logger.info("Populating population status table")
Expand All @@ -30,5 +30,6 @@ def populate(self):
)
self._session.add(population_status_row)
self.data.append(population_status)
self.patterns.append(TagPattern.parse(f"#{population_status}"))
tagpattern = TagPattern.parse(f"#{population_status}")
self.pattern_to_code[tagpattern] = population_status
self._session.commit()
5 changes: 3 additions & 2 deletions src/hapi/pipelines/database/sector.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(
self._datasetinfo = datasetinfo
self.data = {}
self._sector_map = sector_map
self.patterns = []
self.pattern_to_code = {}

def populate(self):
logger.info("Populating sector table")
Expand All @@ -45,7 +45,8 @@ def populate(self):
)
self._session.add(sector_row)
pattern = code.lower().replace("-", "_")
self.patterns.append(TagPattern.parse(f"#*+{pattern}"))
pattern = TagPattern.parse(f"#*+{pattern}")
self.pattern_to_code[pattern] = code
self._session.commit()

def get_sector_code(self, sector: str) -> str:
Expand Down

0 comments on commit 5a372a1

Please sign in to comment.