-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathto_sqlite.R
55 lines (38 loc) · 1.32 KB
/
to_sqlite.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env Rscript
library(readxl)
library(RSQLite)
xlsx = "SQL_Data_Set_01_April_10_18.xlsx"
sheet_names = tolower(excel_sheets(xlsx))
sheet_names[c(2, 4)] = c("company_name", "company_info")
args = commandArgs(trailingOnly = TRUE)
DB = args[1]
db = dbConnect(SQLite(), DB)
for (i in 2:4) {
sheet = read_xlsx(xlsx, i)
# Standardize the column names.
names = gsub("\\s", "_", tolower(names(sheet)))
names = gsub("__", "_", names, fixed = TRUE)
names = gsub("^_|_$", "", names)
key = match("key", names)
if (key)
names[key] = "ticker"
names(sheet) = names
# Remove erroneous columns caused by "source" link
if (i == 3)
sheet = sheet[-c(9, 10, 11)]
message(paste0(names(sheet), sep = ", "))
dbWriteTable(db, sheet_names[[i]], sheet)
}
# Mon Apr 30 13:06:49 PDT 2018
# CF: Adding in a couple new sources of data.
daily = read.csv("all_stocks_5yr.csv", stringsAsFactors = FALSE)
# Never mind, SQLite won't handle dates.
#daily$date = strptime(daily$date, "%Y-%m-%d")
dbWriteTable(db, name = "daily_share_prices", daily)
sic = read.csv("sic-codes.csv")
dbWriteTable(db, name = "sic", sic)
fortune500 = read.csv("Fortune500Locations.csv")
dbWriteTable(db, name = "company_locations", fortune500)
state = read.csv("statePopulations.csv")
dbWriteTable(db, name = "state_populations", state)
dbDisconnect(db)