-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGetISTIDistances_FEB2015.py
240 lines (207 loc) · 9.7 KB
/
GetISTIDistances_FEB2015.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#!/usr/local/sci/bin/python
#***************************************
# 06 June 2014 KMW - v1
#
# Run of distances to narrow to final station listing
# Loop through ISTI station inventory
# calculate the distance between every station
# output:
# Closest 100 stations in order: stA st1 dist1 st2 dist2 etc
# Closest 1000 stations in order: stA st1 dist1 st2 dist2 etc
# Complete distance matrix in 9+ 10000 by 10000 station files
#
# Find all locations that match
# Remove the matching station from the Distance Lists and
# do not include later on
# Make new INVENTORY list and list bad stations
#
# Output 40 nearest neighbours for each station: FIXCORRNEIGHBOURS...
#
#************************************************************************
# START
#************************************************************************
# USE python2.7
# python2.7 GetISTIDistances_FEB2015.py
#
# REQUIRES
# Geography.py
#************************************************************************
# Set up python imports
import datetime as dt
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.dates import date2num,num2date
import sys, os
from scipy.optimize import curve_fit,fsolve,leastsq
from scipy import pi,sqrt,exp
from scipy.special import erf
import scipy.stats
from math import sqrt,pi
import struct
import pdb # for stopping and restarting with editability (stop is pdb.set_trace(),restart is c)
from Geography import TwoPointDistanceKm
# RESTART VALUE
Restarter='------' #'------' #' 01010000' station ID
# Set up file locations
STATLIST='/data/local/hadkw/ISTI/LISTS/v101_JUL2015/ISTILONGS_stage3proxyelevs_JUL2015.dat'
OUThundred='/data/local/hadkw/ISTI/LISTS/v101_JUL2015/ISTILONGDISTANCES_hundred_stage3proxyelevs_JUL2015.dat'
OUTthousand='/data/local/hadkw/ISTI/LISTS/v101_JUL2015/ISTILONGDISTANCES_thousand_stage3proxyelevs_JUL2015.dat'
OUTGOODS='/data/local/hadkw/ISTI/LISTS/v101_JUL2015/ISTILONGINVENTORY_stage3proxyelevs_JUL2015.dat'
OUTBADS='/data/local/hadkw/ISTI/LISTS/v101_JUL2015/ISTILONGSLOCMATCH_stage3proxyelevs_JUL2015.dat'
OUTneighbours='/data/local/hadkw/ISTI/LISTS/BAWG/FIXCORRNEIGHBOURS_ISTI_stage3proxyelevs_JUL2015.dat'
nstations=0 # defined after reading in station list
ngoods=0 # stations passing unique location criteria
nbads=0 # shortest stations with matching locs
StationIDs=[] # nstations list filled after reading in station list
StationLats=[] # nstations list filled after reading in station list
StationLons=[] # nstations list filled after reading in station list
StatDistsAll=[]
StatDistsSorted=[]
StatIDsSorted=[]
#************************************************************************
# Subroutines
#************************************************************************
# READDATA
def ReadData(FileName,typee,delimee,ASTruth,ColumnChoice):
''' Use numpy genfromtxt reading to read in all rows from a complex array '''
''' Need to specify format as it is complex '''
''' outputs an array of tuples that in turn need to be subscripted by their names defaults f0...f8 '''
''' ISTI INVENTORY USES # WHICH GENFROMTXT ASSUMES ARE COMMENTS - IGNORES ALL TEXT AFTERWARDS '''
''' HENCE comments=';' - HOPEFULLY NO ; IN ISTI. '''
if ColumnChoice == 'XXX':
return np.genfromtxt(FileName, dtype=typee,comments="%",delimiter=delimee) # ReadData
else:
return np.genfromtxt(FileName, dtype=typee,comments="%",delimiter=delimee,autostrip=ASTruth,usecols=ColumnChoice) # ReadData
#************************************************************************
# GETDISTANCEARRAYS
def GetDistanceArrays(TheCID,TheDistsAll,TheSortedDists,TheSortedIDs,
TheCLat,TheCLon,TheLats,TheLons,TheIDs,TheSCount,AnyBads):
''' Call TwoPointDistancesKms to get a matrix of distances '''
''' Sort into closest 100 and 1000 '''
# Call TwoPointDistancesKms to work out matrix of distances for all stations
#print(TheCLat,TheCLon,TheLats[4993:4996],TheLons[4993:4996],TheIDs[4993:4996])
TheDistsAll=TwoPointDistanceKm(TheCLat,TheCLon,TheLats,TheLons)
#print(TheDistsAll[4993:4996])
# For each station, sort distances and pull out 100, 1000 closest
SortIndex=np.argsort(TheDistsAll)
#print(SortIndex)
TheSortedDists=TheDistsAll[SortIndex]
TheSortedIDs=TheIDs[SortIndex]
#print(TheSortedDists[0:10])
# Remove the distance for Candidate station with Candidate station
got=np.where(TheCID == TheSortedIDs)[0]
TheSortedIDs=np.delete(TheSortedIDs,got)
TheSortedDists=np.delete(TheSortedDists,got)
# Is there a 0.0 distance? A Location Match!
# If so - flag - this station will then be removed
# If there are multiple stations then all but the last will be removed
AnyBads=np.where(TheSortedDists == 0.0)[0]
return TheDistsAll,TheSortedDists,TheSortedIDs,AnyBads # GetDistanceArrays
#************************************************************************
# WRITEOUTSORTED
def WriteOutSorted(TheStDists,TheStIDs,TheFile,TheCandidateID):
''' Output lines to text of StationID, list of stations and distances '''
# Convert all distances to set length strings
#print(TheStDists[0:10])
#TheStDistsStr=np.array(["{:9.3f}".format(dd) for dd in TheStDists.reshape(TheStDists.size)])
#TheStDistsStr=TheStDistsStr.reshape(TheStDists.shape)
TheStDistsStr=["{:12.3f}".format(dd) for dd in TheStDists]
#print(TheStDistsStr[0:10])
# Make a nstations (rows) by 2 column array, reform such that it becomes r1c1,r2c1,r1c2,r2c2,r1c3,r2c3 etc
TheData=np.reshape(zip(*np.vstack((TheStIDs,TheStDistsStr))),len(TheStIDs)*2) # a one by nstations array
goo=np.reshape(np.append(TheCandidateID,TheData),(1,(len(TheStIDs)*2)+1))
np.savetxt(TheFile,goo,fmt='%s',delimiter=' ')
return #WriteOutSorted
#***********************************************************************
# WRITEOUT
def WriteOut(TheStIDs,TheFile,TheStationID):
''' Output a line for each station of the station ID '''
''' and its 40 nearest neighbours '''
# Remove white space
TheStationID=TheStationID.strip()
TheStIDs=[dd.strip() for dd in TheStIDs]
goo=np.reshape(np.append(TheStationID,TheStIDs),(1,len(TheStIDs)+1))
np.savetxt(TheFile,goo,fmt='%s',delimiter=' ')
return #WriteOut
#***********************************************************************
# MAIN PROGRAM
#***********************************************************************
# read in station list
MyTypes=("|S12","|S31","|S24","float","float","float","int","int","int","int","int","int","|S16","|S17")
MyDelimiters=[12,31,24,8,11,9,5,5,5,5,5,5,16,17]
MyColumns='XXX'
RawData=ReadData(STATLIST,MyTypes,MyDelimiters,False,MyColumns)
StationIDs=np.array(RawData['f0'],ndmin=1)
StationLats=np.array(RawData['f3'],ndmin=1)
StationLons=np.array(RawData['f4'],ndmin=1)
COPYStationIDs=np.array(RawData['f0'],ndmin=1)
COPYStationLats=np.array(RawData['f3'],ndmin=1)
COPYStationLons=np.array(RawData['f4'],ndmin=1)
nstations=len(StationIDs)
## Output the full matrix title of all station IDs
#if Restarter == '------':
# My_Fhandle=file(OUTmatrix+".dat",'a')
# goo=np.reshape(np.append(" STATIONID",StationIDs),(1,nstations+1))
# np.savetxt(My_Fhandle,goo,fmt='%s',delimiter=' ')
# My_Fhandle.close()
print(Restarter)
My_FGhandle=file(OUTGOODS,'a')
My_FBhandle=file(OUTBADS,'a')
# Get the distances for each file individually relative to all others
ngoods=nstations
for ss in range(nstations):
#print(StationIDs[ss])
if Restarter != '------' and Restarter != StationIDs[ss]:
continue
else:
Restarter='------'
print(StationIDs[ss])
# sort out RawData format for outputting station list
outraw=list(RawData[ss])
outraw[3]="{:8.4f}".format(outraw[3])
outraw[4]="{:11.4f}".format(outraw[4])
outraw[5]="{:9.2f}".format(outraw[5])
outraw[6:12]=["{:5d}".format(dd) for dd in outraw[6:12]]
outraw=np.reshape(outraw,(1,len(outraw)))
#print(StationIDs[ss])
# Create appropriate size arrays
StationDistsAll=np.zeros([ngoods])
StatDistsSorted=np.zeros([ngoods])
StatIDsSorted=np.empty([ngoods],dtype=object) # object allows strings of any length and other types
LocsMatch=[]
#print(StationLats[ss],StationLons[ss])
StationDistsAll,StatDistsSorted,StatIDsSorted,LocsMatch=GetDistanceArrays(StationIDs[ss],
StationDistsAll,StatDistsSorted,StatIDsSorted,StationLats[ss],
StationLons[ss],COPYStationLats,COPYStationLons,COPYStationIDs,ngoods,LocsMatch)
# If there is a LocsMatch value then remove this station from the list
if len(LocsMatch) > 0:
nbads=nbads+1
ngoods=ngoods-1
#findit=np.array([np.where(StationIDs == i) for i in LocsMatch]) # match multiple elements
findit=np.where(COPYStationIDs == StationIDs[ss])[0] # match single elements
COPYStationIDs=np.delete(COPYStationIDs,findit)
COPYStationLats=np.delete(COPYStationLats,findit)
COPYStationLons=np.delete(COPYStationLons,findit)
# output file to BAD list
np.savetxt(My_FBhandle,outraw,fmt='%s',delimiter='')
print("FOUND A LOCMATCH: ",ngoods,nbads,len(COPYStationIDs))
else:
# outpur file to GOOD List
np.savetxt(My_FGhandle,outraw,fmt='%s',delimiter='')
# Output the sorted arrays
StCounts=100
My_Fhandle=file(OUThundred,'a')
WriteOutSorted(StatDistsSorted[0:100],StatIDsSorted[0:100],My_Fhandle,StationIDs[ss])
My_Fhandle.close()
StCounts=1000
My_Fhandle=file(OUTthousand,'a')
WriteOutSorted(StatDistsSorted[0:1000],StatIDsSorted[0:1000],My_Fhandle,StationIDs[ss])
My_Fhandle.close()
StCounts=40
My_Fhandle=file(OUTneighbours,'a')
WriteOut(StatIDsSorted[0:40],My_Fhandle,StationIDs[ss])
My_Fhandle.close()
My_FBhandle.close()
My_FGhandle.close()
#stop()
print("And, we are done!")