-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcensus.py
98 lines (69 loc) · 2.74 KB
/
census.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import json
import requests
import pandas as pd
import pymongo
from pymongo import MongoClient
from config import jc_mongo_username, jc_mongo_password
## Connect to mongodb
client = MongoClient("mongodb+srv://" + jc_mongo_username + ":" + jc_mongo_password + "@techdata.hvqxz.mongodb.net/<dbname>?retryWrites=true&w=majority")
def cloud_collection(database, collection):
# Read mongo database
db = client[database]
# Read mongo collection
return db[collection]
# Run to load data into the mongodb cloud
def load_df(df,database,collection):
# Set collection to load data into
db_c = cloud_collection(database,collection)
# Read CSV to pandas dataframe
df.reset_index(inplace=True)
json_data = df.to_dict('records')
db_c.insert_many(json_data)
def readMongoCensus(database,collection):
# Load cloud collection from cloud
db_c = cloud_collection(database,collection)
# Read collection to a pandas dataframe
db_df = pd.DataFrame(list(db_c.find()))
del db_df['_id']
return db_df
def table_df(table_id,geo_str,acs="latest"):
# Request education attainment data for the listed geo_ids
request_url = f"{base_url}{acs}?table_ids={table_id}&geo_ids={geo_str}"
response = requests.get(request_url)
json = response.json()
df = pd.DataFrame()
col_keys = json['tables'][table_id]['columns']
col_names = {}
for column in col_keys:
col_names[column] = col_keys[column]['name']
data = {} #{geoid1:{col_name1:value1,col_name2:value2},geoid2:{col_name1:value1,col_name2:value2}}
for geo,city_name in geo_ids.items():
data[city_name] = {}
for col_id,name in col_names.items():
data[city_name][name] = 100*json['data'][geo][table_id]['estimate'][col_id]/json['data'][geo][table_id]['estimate']["B15003001"]
return pd.DataFrame(data)
#####
# RUN SECTION FOR CREATING DATAFRAME AND IMPORTING TO MONGODB
#####
#
# # geoids of San Fran, Atlanta, Chicago, NYC, and Austin
# geo_ids = {"16000US0667000":"San Francisco, CA",
# "16000US1304000":"Atlanta, GA",
# "16000US1714000":"Chicago, IL",
# "16000US3651000":"New York, NY",
# "16000US4805000":"Austin, TX"}
# # Create comma delimited string of geo_ids
# geo_str = ",".join(geo_ids.keys())
# base_url = "https://api.censusreporter.org/1.0/data/show/"
# tables = {
# "B15003":"Education Attainment",
# "B08301":"Means of Transportation",
# "B08303":"Time to travel to work"
# }
# # loop through the years for the dataframe
# for i in years:
# edu_df = table_df("B15003",geo_str,year)
# edu_df = edu_df.transpose()
# print(edu_df)
# # Load data into database
# load_df(edu_df,"censusData","2019 Education Attainment")