-
Notifications
You must be signed in to change notification settings - Fork 0
/
update-transaction-data.py
196 lines (173 loc) · 8.28 KB
/
update-transaction-data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# update-transaction-data.py
#
# This app looks for a CSV file exported from mint
from __future__ import print_function
import os.path
from datetime import datetime, timedelta
import pandas as pd
import re
# Sheets Authentication and access modules
import gspread
from oauth2client.service_account import ServiceAccountCredentials
SCOPES = ["https://spreadsheets.google.com/feeds",'https://www.googleapis.com/auth/spreadsheets',"https://www.googleapis.com/auth/drive.file","https://www.googleapis.com/auth/drive"]
# ToDo - read these globals in from environment or config
MINT_DATA = 'mint-transactions.csv'
# Number of days to "look back" after finding the first mint
# transaction that exists in the google sheets already
REQUIRE_UNIT_WITH_UTIL_TRANSACTIONS = True
# Number of days to "look back" after finding the first mint
# transaction that exists in the google sheets already
LOOK_BACK_DAYS = 7
# Update local transactions.csv if new data is found
UPDATE_LOCAL_TRANSACTIONS = True
LOCAL_TRANSACTIONS_FILE = 'transactions.csv'
# If modifying these scopes, delete the file token.pickle.
# SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
# The ID and Tab name that contains the propery transactions
SPREADSHEET_ID = '1upOBoNVZAOl_Ulcgd3v0V2qfoo-NE1hVzmJVm1gBNGc'
RANGE_NAME = 'All Transactions'
def pull_sheet_data(SCOPES,SPREADSHEET_ID,RANGE_NAME):
"""Pulls the Spreadsheet and range specified via
the Google Sheets API and returns the data
"""
print('Fetching existing google sheets transaction data...')
creds = ServiceAccountCredentials.from_json_keyfile_name("sheets-cred.json", SCOPES)
client = gspread.authorize(creds)
sheet = client.open_by_key(SPREADSHEET_ID)
worksheet = sheet.worksheet(RANGE_NAME)
values = worksheet.get_all_records()
if not values:
print('No data found.')
else:
print('Google sheets transaction data download complete.')
df = pd.DataFrame(values, columns=values[0])
df['Amount'] = df['Amount'].astype(float)
df['Date'] = pd.to_datetime(df['Date']).dt.date
df.set_index(['Date'], inplace=True)
df.sort_index(inplace=True, ascending=False)
# print(df.head())
return df
def read_mint_data(MINT_DATA):
"""Reads the raw data from a transactions
file exported from mint into a dataframe
"""
# Load the transaction data from the csv into a dataframe
parse_dates = ['Date']
dtype_dic = {'Amount':str}
df = pd.read_csv(MINT_DATA, parse_dates=parse_dates, dtype=str)
df['Date'] = pd.to_datetime(df['Date']).dt.date
df['Amount'] = df['Amount'].astype(float)
df.set_index(['Date'], inplace=True)
df.sort_index(inplace=True, ascending=False)
return df
def add_rows_to_sheets(SCOPES,SPREADSHEET_ID,RANGE_NAME, new_rows):
"""Writes the new rows to the Sheet
"""
creds = ServiceAccountCredentials.from_json_keyfile_name("sheets-cred.json", SCOPES)
client = gspread.authorize(creds)
sheet = client.open_by_key(SPREADSHEET_ID)
worksheet = sheet.worksheet(RANGE_NAME)
response = worksheet.append_rows(new_rows, value_input_option='USER_ENTERED')
# print(response)
if 'updates' in response and response['updates']['updatedRows'] == len(new_rows):
print('Sheets update was succesful. Sorting....')
# Sort the data so that newest transactions are at the top
# response.updates.updatedRange will be something like 'All Transactions'!A619:J619
# Use what is left of the colon to determine the last row of the sort range
lastRowNum = int(re.findall(r'\D\d+$', response['updates']['updatedRange'])[0][1:])
lastColLetter = re.findall(r'\D\d+$', response['tableRange'])[0][0:1]
sort_range = 'A2:'+lastColLetter+str(lastRowNum)
response = worksheet.sort((1, 'des'), range=sort_range)
#print(response)
if 'spreadsheetId' in response:
print('Sheets updated and sorted.')
else:
print('Sheets sort failed:')
print(response)
print('Please update manually.')
else:
print('Unexpected response when adding new rows to sheets:')
print(response)
def process_labels(date, row):
"""Evaluate mint "labels" and add the sheets
style "Labels" and "Unit" fields as appropriate
"""
labels = row.Labels.split()
row.Labels = ''
for label in labels:
if label.startswith('Label-'):
row.Labels = label[6:]
if label.startswith('Unit-'):
row.Unit = label[5:]
if row.Labels == '':
print('WARNING: Transaction', row.Description,'from',date,'is missing a label. Data cleanup needed.')
return pd.Series([], dtype=object)
if (row.Labels == 'Rent') and row.Unit == '':
print('WARNING: Rent Transaction', row.Description,'from',date,'is missing a unit. Data cleanup needed.')
return pd.Series([], dtype=object)
if REQUIRE_UNIT_WITH_UTIL_TRANSACTIONS and (row.Labels == 'Utilities') and row.Unit == '':
print('WARNING: Utilities Transaction', row.Description,'from',date,'is missing a unit. Data cleanup needed.')
return pd.Series([], dtype=object)
return row
def write_sheets_data_locally(SCOPES,SPREADSHEET_ID,RANGE_NAME, LOCAL_TRANSACTIONS_FILE):
"""Fetch the google sheets data and
write it to a local csv file
"""
# ToDo its probably more efficient to update the existing DF in memory
# rather than read the whole thing in from sheets again..
print('Attempting to update local', LOCAL_TRANSACTIONS_FILE)
sheets_df = pull_sheet_data(SCOPES,SPREADSHEET_ID,RANGE_NAME)
sheets_df.to_csv(LOCAL_TRANSACTIONS_FILE)
print('Local', LOCAL_TRANSACTIONS_FILE,'updated.')
# Read in all the labeled transaction data from google sheets
sheets_df = pull_sheet_data(SCOPES,SPREADSHEET_ID,RANGE_NAME)
# print(sheets_df.head())
# print(sheets_df.info())
# Read in the latest transactions exported from mint
mint_df = read_mint_data(MINT_DATA)
mint_df.fillna(value='', inplace=True) # Get rid of NaN which sheets doesn't like
mint_df.insert(7, 'Unit', '') # Add the missing "Unit" column to the mint values
# print(mint_df.head())
# Iterate through the newest transactions until we find a duplicate
new_rows = []
lookback = ''
# Don't add new transactions if warning(s) are found
bad_data = False;
for i, row in mint_df.iterrows():
if lookback and i < lookback:
# We have found all the new transactions (if any)
if bad_data:
print('Please address warnings above in Mint and re-export', MINT_DATA);
elif len(new_rows):
print('Attempting to insert',len(new_rows),'new rows...')
add_rows_to_sheets(SCOPES,SPREADSHEET_ID,RANGE_NAME, new_rows)
if UPDATE_LOCAL_TRANSACTIONS:
write_sheets_data_locally(SCOPES,SPREADSHEET_ID,RANGE_NAME, LOCAL_TRANSACTIONS_FILE)
else:
print('No new transaction data found. Google Sheets data unchanged')
quit()
# Check to see if the entry exists already in mint
if sheets_df[(sheets_df.index == i) & (sheets_df['Description'] == row['Description']) & (sheets_df.Amount == float(row.Amount))].empty:
# This is new. Convert the date to the string format google uses
date = i.strftime('%-m/%-d/%Y')
# Convert the overloaded mint labels into
# discrete "Label" and "Unit" columns
row = process_labels(date, row)
if row.empty:
# Force data cleanup in mint before exporting to sheets
bad_data = True;
if not bad_data:
print('Found new transaction:',date, row.Description, row.Amount, row.Labels, row.Unit)
new_row = row.tolist()
new_row.insert(0, date)
# print(new_row)
new_rows.append(new_row)
else:
if lookback == '':
# We found the first mint transaction that exists in google sheets
# We will keep comparing LOOK_BACK_DAYS days more worth of transaction in case
# an an older transaction was added since the last update
lookback = i - timedelta(days=LOOK_BACK_DAYS)
print('Found first mint transaction that exists in sheets.')
# print(date, row.Description, row.Amount)
print('Will keep checking mint transactions from', lookback, 'and earlier.')