-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge-models.py
executable file
·117 lines (82 loc) · 4.48 KB
/
merge-models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#! /usr/bin python
#------------------------------------------------------------------------------
# PROGRAM: merge_models.py
#------------------------------------------------------------------------------
# Version 0.1
# 22 September, 2023
# Michael Taylor
# https://patternizer.github.io
# michael DOT a DOT taylor AT uea DOT ac DOT uk
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# IMPORT PYTHON LIBRARIES
#------------------------------------------------------------------------------
# Dataframe libraries:
import numpy as np
import pandas as pd
#import xarray as xr
#import cftime
# OS libraries:
import os
import glob
#import sys
#import time
#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# SETTINGS:
#------------------------------------------------------------------------------
year_start, year_end = 1850, 2100
thresholds = [ 'threshold1', 'threshold2', 'threshold3', 'threshold4' ]
#------------------------------------------------------------------------------
# LOAD: timeseries for each variable per projection for all models from .pkl and extract lists
#------------------------------------------------------------------------------
filelist = sorted( glob.glob( 'RUN/' + '*.pkl' ), reverse = False )
variablelist = []
projectionlist = []
citylist = []
modellist = []
for i in range(len(filelist)):
words = filelist[i].split('/')[1].split('.')[0].split('_')
variable = words[0]
projection = words[1].split('-')[1]
city = words[2]
model = words[3]
variablelist.append( variable )
projectionlist.append( projection )
citylist.append( city )
modellist.append( model )
variables = np.unique( np.array( variablelist ) )
projections = np.unique( np.array( projectionlist ) )
cities = np.unique( np.array( citylist ) )
models = np.unique( np.array( modellist ) )
models = [ models[i].replace('-','_').lower() for i in range(len(models)) ]
for v in range(len(variables)):
for j in range(len(thresholds)):
for c in range(len(cities)):
for p in range(len(projections)):
# INIT: standard dataframe for timeseries
t = pd.date_range(start=str(year_start), end=str(year_end), freq='AS')
df = pd.DataFrame( {'datetimes':t} )
for f in range(len(filelist)):
words = filelist[f].split('/')[1].split('.')[0].split('_')
variable = words[0]
parameter = words[1].split('-')[0]
projection = words[1].split('-')[1]
city = words[2]
model = words[3]
if ( ( variable == variables[v] ) & ( parameter == thresholds[j] ) ) & ( ( city == cities[c] ) & ( projection == projections[p] ) ):
print(variable, parameter, city, projection, model)
# EXTRACT: timeseries at location and append to dataframe
ds = pd.read_pickle( filelist[f], compression='bz2' )
t = pd.date_range(start=str(ds.datetimes.dt.year.values[0]), end=str(ds.datetimes.dt.year.values[-1]+1), freq='AS')[0:-1]
ts = ds[model].values
dv = pd.DataFrame( {'datetimes':t, model:ts} )
df = df.merge(dv, how='left', on='datetimes')
else:
continue
#------------------------------------------------------------------------------
# SAVE: dataframe for each variable per projection for all models
#------------------------------------------------------------------------------
df.to_pickle( variables[v] + '_' + thresholds[j] + '_' + projections[p] + '_' + cities[c] + '.pkl', compression='bz2')
#------------------------------------------------------------------------------
print('** END')