Skip to content

Commit

Permalink
Merge pull request #936 from jdebacker/demog_token
Browse files Browse the repository at this point in the history
Merging
  • Loading branch information
rickecon authored Jun 11, 2024
2 parents e762b06 + 138a981 commit 4e6ae04
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 50 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ regression/OUTPUT_BASELINE/*
regression/OUTPUT_REFORM*
.vscode/
*default.profraw
*un_api_token.txt
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.11.8] - 2024-06-09 01:00:00

### Added

- Updates to `demographics.py` module to accept token for UN World Population Prospects database access or to download data from the [Population-Data](https://github.com/EAPD-DRB/Population-Data) repository.

## [0.11.7] - 2024-06-07 01:00:00

Expand Down Expand Up @@ -233,6 +238,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Any earlier versions of OG-USA can be found in the [`OG-Core`](https://github.com/PSLmodels/OG-Core) repository [release history](https://github.com/PSLmodels/OG-Core/releases) from [v.0.6.4](https://github.com/PSLmodels/OG-Core/releases/tag/v0.6.4) (Jul. 20, 2021) or earlier.


[0.11.8]: https://github.com/PSLmodels/OG-Core/compare/v0.11.7...v0.11.8
[0.11.7]: https://github.com/PSLmodels/OG-Core/compare/v0.11.6...v0.11.7
[0.11.6]: https://github.com/PSLmodels/OG-Core/compare/v0.11.5...v0.11.6
[0.11.5]: https://github.com/PSLmodels/OG-Core/compare/v0.11.4...v0.11.5
Expand Down
2 changes: 1 addition & 1 deletion ogcore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@
from ogcore.txfunc import *
from ogcore.utils import *

__version__ = "0.11.7"
__version__ = "0.11.8"
153 changes: 106 additions & 47 deletions ogcore/demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,24 @@ def get_un_data(
+ "?format=csv"
)

# Check for a file named "un_api_token.txt" in the current directory
if os.path.exists(os.path.join("un_api_token.txt")):
with open(os.path.join("un_api_token.txt"), "r") as file:
UN_TOKEN = file.read().strip()
else: # if file not exist, prompt user for token
UN_TOKEN = input(
"Please enter your UN API token (press return if you do not have one): "
)
# write the UN_TOKEN to a file to find in the future
with open(os.path.join("un_api_token.txt"), "w") as file:
file.write(UN_TOKEN)

# get data from url
response = get_legacy_session().get(target)
payload = {}
headers = {"Authorization": "Bearer " + UN_TOKEN}
response = get_legacy_session().get(target, headers=headers, data=payload)
# Check if the request was successful before processing
if response.status_code == 200:

# if want to download the data
# with open("downloaded_datan.csv", "wb") as f:
# f.write(response.content)
# df = pd.read_csv("downloaded_datan.csv")
# else
# print("TARGET: ", target)
csvStringIO = StringIO(response.text)
df = pd.read_csv(csvStringIO, sep="|", header=1)

Expand All @@ -93,10 +100,40 @@ def get_un_data(
df.year = df.year.astype(int)
df = df[df.age < 100] # need to drop 100+ age category
else:
# Read from UN GH Repo:
print(
f"Failed to retrieve population data. HTTP status code: {response.status_code}"
f"Failed to retrieve population data from UN. Reading "
+ " from https://github.com/EAPD-DRB/Population-Data "
+ "instead of UN WPP API"
)
country_dict = {
"840": "USA",
"710": "ZAF",
"458": "MYS",
"356": "IND",
"826": "UK",
}
un_variable_dict = {
"68": "fertility_rates",
"80": "mortality_rates",
"47": "population",
}
country = country_dict[country_id]
variable = un_variable_dict[variable_code]
url = (
"https://raw.githubusercontent.com/EAPD-DRB/"
+ "Population-Data/main/"
+ "Data/{c}/UN_{v}_data.csv".format(c=country, v=variable)
)
assert False
df = pd.read_csv(url)
# keep just the years requested
df = df[(df.year >= start_year) & (df.year <= end_year)]

# Do we still want to keep the status code for failures?
# print(
# f"Failed to retrieve population data. HTTP status code: {response.status_code}"
# )
# assert False

return df

Expand Down Expand Up @@ -136,11 +173,15 @@ def get_fert(
"""
# initialize fert rates array
fert_rates_2D = np.zeros((end_year + 1 - start_year, totpers))
# Read UN data, 1 year at a time
# Read UN data
df = get_un_data(
"68", country_id=country_id, start_year=start_year, end_year=end_year
)
# CLean and rebin data
for y in range(start_year, end_year + 1):
df = get_un_data("68", country_id=country_id, start_year=y, end_year=y)
df_y = df[(df.age >= min_age) & (df.age <= max_age) & (df.year == y)]
# put in vector
fert_rates = df.value.values
fert_rates = df_y.value.values
# fill in with zeros for ages < 15 and > 49
# NOTE: this assumes min_year < 15 and max_age > 49
fert_rates = np.append(fert_rates, np.zeros(max_age - 49))
Expand All @@ -163,7 +204,7 @@ def get_fert(

# Create plots if needed
if graph:
if plot_path:
if plot_path is not None:
pp.plot_fert_rates(
[fert_rates_2D],
start_year=start_year,
Expand Down Expand Up @@ -219,10 +260,14 @@ def get_mort(
mort_rates_2D = np.zeros((end_year + 1 - start_year, totpers))
infmort_rate_vec = np.zeros(end_year + 1 - start_year)
# Read UN data
df = get_un_data(
"80", country_id=country_id, start_year=start_year, end_year=end_year
)
# CLean and rebin data
for y in range(start_year, end_year + 1):
df = get_un_data("80", country_id=country_id, start_year=y, end_year=y)
df_y = df[(df.age >= min_age) & (df.age <= max_age) & (df.year == y)]
# put in vector
mort_rates_data = df.value.values
mort_rates_data = df_y.value.values
# In UN data, mortality rates for 0 year olds are the infant
# mortality rates
infmort_rate = mort_rates_data[0]
Expand All @@ -249,7 +294,7 @@ def get_mort(

# Create plots if needed
if graph:
if plot_path:
if plot_path is not None:
pp.plot_mort_rates_data(
mort_rates_2D,
start_year,
Expand Down Expand Up @@ -322,7 +367,7 @@ def get_pop(
"""
# Generate time path of the nonstationary population distribution
# Get path up to end of data year
pop_2D = np.zeros((end_year + 1 - start_year + 1, E + S))
pop_2D = np.zeros((end_year + 2 - start_year, E + S))
if infer_pop:
if pre_pop_dist is None:
pre_pop_data = get_un_data(
Expand All @@ -331,11 +376,17 @@ def get_pop(
start_year=start_year - 1,
end_year=start_year - 1,
)
if download_path:
pre_pop_data.to_csv(
os.path.join(download_path, "raw_pre_pop_data_UN.csv"),
index=False,
)
pre_pop_sample = pre_pop_data[
(pre_pop_data["age"] >= min_age)
& (pre_pop_data["age"] <= max_age)
]
pre_pop = pre_pop_sample.value.values
pre_pop_dist = pop_rebin(pre_pop, E + S)
else:
pre_pop = pre_pop_dist
if initial_pop is None:
Expand All @@ -350,6 +401,7 @@ def get_pop(
& (pre_pop_data["age"] <= max_age)
]
initial_pop = initial_pop_sample.value.values
initial_pop = pop_rebin(initial_pop, E + S)
# Check that have all necessary inputs to infer the population
# distribution
assert not [
Expand All @@ -374,22 +426,27 @@ def get_pop(
)
else:
# Read UN data
pop_data = get_un_data(
"47",
country_id=country_id,
start_year=start_year,
end_year=end_year
+ 2, # note go to + 2 because needed to infer immigration for end_year
)
# CLean and rebin data
for y in range(start_year, end_year + 2):
pop_data = get_un_data(
"47",
country_id=country_id,
start_year=y,
end_year=y,
)
pop_data_sample = pop_data[
(pop_data["age"] >= min_age) & (pop_data["age"] <= max_age)
(pop_data["age"] >= min_age)
& (pop_data["age"] <= max_age)
& (pop_data["year"] == y)
]
pop = pop_data_sample.value.values
# Generate the current population distribution given that E+S might
# be less than max_age-min_age+1
# age_per_EpS = np.arange(1, E + S + 1)
pop_EpS = pop_rebin(pop, E + S)
pop_2D[y - start_year, :] = pop_EpS

# get population distribution one year before initial year for
# calibration of omega_S_preTP
pre_pop_data = get_un_data(
Expand Down Expand Up @@ -527,25 +584,27 @@ def get_imm_rates(
assert fert_rates.shape == mort_rates.shape
assert infmort_rates is not None
assert infmort_rates.shape[0] == mort_rates.shape[0]
# Read UN data
for y in range(start_year, end_year + 1):
if pop_dist is None:
# need to read UN population data by age for each year
df = get_un_data(
"47", country_id=country_id, start_year=y, end_year=y
)
pop_t = df[(df.age < 100) & (df.age >= 0)].value.values
if pop_dist is None:
# need to read UN population data
df = get_un_data(
"47",
country_id=country_id,
start_year=start_year,
end_year=end_year + 2,
)
pop_dist = np.zeros((end_year + 2 - start_year, totpers))
for y in range(start_year, end_year + 1):
pop_t = df[
(df.age < 100) & (df.age >= 0) & (df.year == y)
].value.values
pop_t = pop_rebin(pop_t, totpers)
df = get_un_data(
"47", country_id=country_id, start_year=y + 1, end_year=y + 1
)
pop_tp1 = df[(df.age < 100) & (df.age >= 0)].value.values
pop_tp1 = pop_rebin(pop_tp1, totpers)
else:
# Make sure shape conforms
assert pop_dist.shape[1] == mort_rates.shape[1]
pop_t = pop_dist[y - start_year, :]
pop_tp1 = pop_dist[y - start_year + 1, :]
pop_dist[y - start_year, :] = pop_t
# Make sure shape conforms
assert pop_dist.shape[1] == mort_rates.shape[1]
assert pop_dist.shape[0] == end_year - start_year + 2
for y in range(start_year, end_year + 1):
pop_t = pop_dist[y - start_year, :]
pop_tp1 = pop_dist[y + 1 - start_year, :]
# initialize imm_rate vector
imm_rates = np.zeros(totpers)
# back out imm rates by age for each year
Expand All @@ -570,7 +629,7 @@ def get_imm_rates(

# Create plots if needed
if graph:
if plot_path:
if plot_path is not None:
pp.plot_imm_rates(
imm_rates_2D,
start_year,
Expand Down Expand Up @@ -640,7 +699,7 @@ def get_pop_objs(
pre_pop_dist=None,
country_id=UN_COUNTRY_CODE,
initial_data_year=START_YEAR - 1,
final_data_year=START_YEAR + 2, # as default data year goes until T1
final_data_year=START_YEAR + 2,
GraphDiag=True,
download_path=None,
):
Expand Down Expand Up @@ -710,8 +769,8 @@ def get_pop_objs(
final_data_year,
)
assert E + S <= max_age - min_age + 1
assert initial_data_year >= 2011 and initial_data_year <= 2100
assert final_data_year >= 2011 and final_data_year <= 2100
assert initial_data_year >= 2011 and initial_data_year <= 2100 - 1
assert final_data_year >= 2011 and final_data_year <= 2100 - 1
# Ensure that the last year of data used is before SS transition assumed
# Really, it will need to be well before this
assert final_data_year > initial_data_year
Expand Down
1 change: 0 additions & 1 deletion ogcore/txfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ def txfunc_est(
phi1_init = 1.0
phi2_init = 1.0
params_init = np.array([phi0_init, phi1_init, phi2_init])
print("Initial phi0, phi1, phi2: ", params_init)
tx_objs = (
np.array([None]),
X,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="ogcore",
version="0.11.7",
version="0.11.8",
author="Jason DeBacker and Richard W. Evans",
license="CC0 1.0 Universal (CC0 1.0) Public Domain Dedication",
description="A general equilibribum overlapping generations model for fiscal policy analysis",
Expand Down
4 changes: 4 additions & 0 deletions tests/test_demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ def test_get_pop_objs_read_UN_data():
GraphDiag=False,
)

assert isinstance(pop_dict, dict)


def test_get_pop_objs():
"""
Expand Down Expand Up @@ -286,6 +288,8 @@ def test_get_imm_rates():
mort_rates=mort_rates,
infmort_rates=infmort_rates,
pop_dist=pop_dist,
start_year=2024,
end_year=2025,
graph=True,
)
assert imm_rates.shape[1] == S
Expand Down

0 comments on commit 4e6ae04

Please sign in to comment.