-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcreate_db.py
98 lines (75 loc) · 2.68 KB
/
create_db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import pandas
import sqlite3
import json
from sqlalchemy import create_engine
def print_cols(df):
for x in df.columns:
print x
#print_cols(df)
def create_query():
return ''
# Example query:
# ''' CREATE TABLE stocks (date text, symbol text, price real);
# INSERT INTO stock VALUES () '''
import numpy as np
def generate_mappings(df):
with open("waterbodies.js","w") as f:
f.write("waterbodies = ")
f.write(json.dumps(df[["WATERBODY_CODE","GUIDE_LOCNAME_ENG"]].iloc[np.unique(df["WATERBODY_CODE"].values, return_index=1)[1]].values.tolist()))
with open("species.js","w") as f:
f.write("species = "+json.dumps(df[["SPECIES_CODE","SPECNAME"]].iloc[np.unique(df["SPECIES_CODE"].values, return_index=1)[1]].values.tolist()))
f.write(json.dumps(df[["SPECIES_CODE","SPECNAME"]].iloc[np.unique(df["SPECIES_CODE"].values, return_index=1)[1]].values.tolist()))
return
def make_hack_txt(df,output_filename,col,max_rows=None):
# gets all unique values in a column and dumps to outfile
if max_rows:
trunc_df = df[:max_rows]
else:
trunc_df = df
with open(output_filename,"w") as f:
#print np.unique(df["WATERBODY_CODE"].values, return_index=1)[1]
vals = np.unique(trunc_df.iloc[0:][col].values)
string = ""
i = 0
for item in vals:
string += "<option value=\"" + item + "\">" + item + "</option>\n"
#string += "<option value=\"" + str(i) + "\">" + item + "</option>\n"
i+=1
f.write(string)
def generate_mappings_bootstrap_hack(df):
make_hack_txt(df,"waterbodies.txt","GUIDE_LOCNAME_ENG")
make_hack_txt(df,"fish.txt","SPECNAME")
make_hack_txt(df,"lengths.txt","LENGTH_CATEGORY_LABEL")
#f.write(json.dumps(df[["WATERBODY_CODE",]].values.tolist()))
def try_it(df):
with open("test.txt", "w") as f:
for row in df.iterrows():
#print row[1]
f.write("[" + str(row[1]["WATERBODY_CODE"]) + "," + str(row[1]["SPECIES_CODE"]) + "," + str(1) + "]\n")
def create_postgres_db(df):
df.columns = [c.lower() for c in df.columns] #postgres doesn't like capitals or spaces
engine = create_engine('postgresql://username:password@localhost:5432/dbname')
df.to_sql("my_table_name", engine)
def create_db(df):
conn = sqlite3.connect('example_small.db')
conn.text_factory = str;
c = conn.cursor()
#query = create_query()
#c.execute(query)
#conn.commit() # may need to uncomment
### comment out these lines to avoid using the toy db
df = df.iloc[:,[0,1,3,4,6,8,10,13,14]]
print df.columns
df = df.iloc[0:100]
###
df.to_sql("fishes", conn, if_exists="replace")
conn.close()
def main():
df = pandas.read_csv("FishGuideRawData.csv")
#generate_mappings(df)
print_cols(df)
df = df[[""]]
generate_mappings_bootstrap_hack(df)
try_it(df)
#create_db(df)
main()