diff --git a/.gitattributes b/.gitattributes index 8df1b9e..b043de4 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,27 +1,27 @@ # Data -*.gdx filter=lfs diff=lfs merge=lfs -text -*.g00 filter=lfs diff=lfs merge=lfs -text -*.feather filter=lfs diff=lfs merge=lfs -text -*.mdb filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.gtb filter=lfs diff=lfs merge=lfs -text +*.gdx filter=lfs -diff merge=lfs -text +*.g00 filter=lfs -diff merge=lfs -text +*.feather filter=lfs -diff merge=lfs -text +*.mdb filter=lfs -diff merge=lfs -text +*.pkl filter=lfs -diff merge=lfs -text +*.gtb filter=lfs -diff merge=lfs -text # Images -*.png filter=lfs diff=lfs merge=lfs -text -*.jpg filter=lfs diff=lfs merge=lfs -text -*.svg filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs -diff merge=lfs -text +*.jpg filter=lfs -diff merge=lfs -text +*.svg filter=lfs -diff merge=lfs -text -*.pdf filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs -diff merge=lfs -text # MS Office -*.xls filter=lfs diff=lfs merge=lfs -text -*.xlsx filter=lfs diff=lfs merge=lfs -text -*.ppt filter=lfs diff=lfs merge=lfs -text -*.pptx filter=lfs diff=lfs merge=lfs -text -*.doc filter=lfs diff=lfs merge=lfs -text -*.docx filter=lfs diff=lfs merge=lfs -text +*.xls filter=lfs -diff merge=lfs -text +*.xlsx filter=lfs -diff merge=lfs -text +*.ppt filter=lfs -diff merge=lfs -text +*.pptx filter=lfs -diff merge=lfs -text +*.doc filter=lfs -diff merge=lfs -text +*.docx filter=lfs -diff merge=lfs -text # Executables -*.exe filter=lfs diff=lfs merge=lfs -text -*.dll filter=lfs diff=lfs merge=lfs -text -*.pyc filter=lfs diff=lfs merge=lfs -text +*.exe filter=lfs -diff merge=lfs -text +*.dll filter=lfs -diff merge=lfs -text +*.pyc filter=lfs -diff merge=lfs -text diff --git a/.gitignore b/.gitignore index 76ed857..b881319 100644 --- a/.gitignore +++ b/.gitignore @@ -6,11 +6,12 @@ *.sublime-workspace *.pyc *.dat -*.gdx -!Model/Gdx/stiliseret_grundforloeb.gdx -!Model/Gdx/stiliseret_grundforloeb_nominal.gdx +*.gdx +!Model/Gdx/aldersprofiler.gdx +!Model/Gdx/ARIMA_forecasts.gdx +!Model/Gdx/previous_static_calibration.gdx +!Model/Gdx/previous_deep_calibration.gdx +!Model/Gdx/previous_smooth_calibration.gdx !Data/*.gdx - -!Model/Savepoints/model.g00 -!Model/Savepoints/model.pkl \ No newline at end of file +!Data/*/*.gdx \ No newline at end of file diff --git a/Analysis/Shocks_MPC/Gdx/.gitkeep b/Analysis/Baseline/Output/.gitignore similarity index 100% rename from Analysis/Shocks_MPC/Gdx/.gitkeep rename to Analysis/Baseline/Output/.gitignore diff --git a/Analysis/Baseline/baseline.py b/Analysis/Baseline/baseline.py new file mode 100644 index 0000000..c089101 --- /dev/null +++ b/Analysis/Baseline/baseline.py @@ -0,0 +1,114 @@ +# coding: utf-8 +import os +import dreamtools as dt +import pandas as pd +from math import ceil +os.chdir(os.path.dirname(__file__)) + +t1 = 2021 +dt.time(1983, 2060) + +operator = "" # Set to "p" to see year on year percentage changes + +output_folder = r"Output" +fname = "baseline" +output_extension = ".html" + +DA = True # Should labels be in Danish or English? + +output_path = f"{output_folder}/{fname}{output_extension}" + +# If the output file already exists, delete it (to prevent accidentally viewing an old version in case of an error) +if os.path.exists(output_path): + os.remove(output_path) + +database_paths = [ + r"..\..\Model\Gdx\previous_smooth_calibration.gdx", + r"..\..\Model\Gdx\smooth_calibration.gdx", + # r"..\..\Model\Gdx\calibration_2018.gdx", + # r"..\..\Model\Gdx\calibration_2019.gdx", + # r"..\..\Model\Gdx\calibration_2020.gdx", + # r"..\..\Model\Gdx\calibration_2021.gdx", +] + +database_labels = [ + "previous_smooth_calibration", + "smooth_calibration", + # "calibration_2018", + # "calibration_2019", + # "calibration_2020", + # "calibration_2021", +] +databases = [dt.Gdx(path) for path in database_paths] +dt.REFERENCE_DATABASE = databases[0] + +from variables_to_plot import variable_labels_DK, variable_labels_EN, get_variable_functions +variable_labels = variable_labels_DK if DA else variable_labels_EN + +# Collect all data to be plottet in a single dataframe +dfs = {} +for getter, variable_label in zip(get_variable_functions, variable_labels): + try: + df = dt.DataFrame(databases, operator, getter, names=database_labels) + df = df.reset_index().melt(value_vars=database_labels, id_vars="t", var_name="database") + dfs[variable_label] = df + except Exception as e: + print(f"Exception: {e} - {variable_label}") + +df = pd.concat(dfs, names=["figure_label"]).reset_index(level=0) +df = pd.DataFrame(df) # Convert from dt.DataFrame to pd.DataFrame + +# Plot layout settings +height_in_cm = 26 +width_in_cm = 18 +pixels_pr_cm = 96 / 2.54 +columns_pr_page = 3 +rows_pr_page = 6 + +# Split the variables into chunks that fit on a page +plots_pr_page = columns_pr_page * rows_pr_page +n_plots = len(variable_labels) +n_pages = ceil(n_plots / plots_pr_page) + +def divide_chunks(l, n): + """Divide a list into chunks of size n""" + for i in range(0, len(l), n): + yield l[i:i + n] + +variable_labels_by_page = list(divide_chunks(variable_labels, plots_pr_page)) + +# Create the plots +figures = [] +for variable_labels in variable_labels_by_page: + fig = df[df.figure_label.isin(variable_labels)].plot( + x="t", + y="value", + facet_col="figure_label", + color="database", + facet_col_wrap=columns_pr_page, + facet_row_spacing= 1.5 / height_in_cm, + facet_col_spacing= 1.5 / width_in_cm, + labels={ + "t": "År", + "value": "" + } + ) + + fig.update_layout( + legend_title_text="", + height=height_in_cm * pixels_pr_cm, + width=width_in_cm * pixels_pr_cm, + legend_y = - 1.5/26, + margin_t = 2.0 * pixels_pr_cm, + plot_bgcolor="white", + ) + + fig.update_traces(line_width=2) + fig.update_xaxes(ticklen=4, gridcolor=dt.dream_colors_rgb["Light gray"]) + fig.update_yaxes(ticklen=4, matches=None, showticklabels=True, gridcolor=dt.dream_colors_rgb["Light gray"]) + fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1])) + fig.add_vline(x=t1, line_width=1, line_dash="dash", line_color="black") + figures.append(fig) + +# Export html report +dt.figures_to_html(figures, f"{output_folder}/{fname}.html") diff --git a/Analysis/Baseline/compare_ARIMA_and_IO.py b/Analysis/Baseline/compare_ARIMA_and_IO.py new file mode 100644 index 0000000..7a37475 --- /dev/null +++ b/Analysis/Baseline/compare_ARIMA_and_IO.py @@ -0,0 +1,196 @@ +""" The following file does two things; + 1. compares changes in IO cells between two years and two baseline .gdx files + 2. compares ARIMA forecasts with drift or trend reversion between two forecasts (e.g. + useful to check how ARIMA's change when updating data). """ + +import os +import dreamtools as dt +import pandas as pd +import numpy as np +from PyPDF2 import PdfMerger +from matplotlib import pyplot as plt +os.chdir(r"C:\Users\B200946\Documents\GitHub\ARIMA_compare\MAKRO-dev\Analysis\Baseline") +plt.rcParams['axes.grid'] = True +plt.rc('lines', linewidth=2.0) + +# IO comparison years +new_year = 2018 +ref_year = 2017 + +# 1. compare IO cells +r = dt.Gdx("..//..//Model/Gdx/previous_smooth_calibration.gdx") # reference +n = dt.Gdx("..//..//Model/Gdx/smooth_calibration.gdx") # new + +qIO_changed_cells = n.qIO.loc[:,:,new_year][r.qIO.loc[:,:,ref_year] == 0] != 0 +with pd.option_context('display.max_rows', None, + 'display.max_columns', None, + 'display.precision', 3, + ): + print(qIO_changed_cells[qIO_changed_cells == True]) # check if there is any change at all + +qIO_pct_change = (n.qIO.loc[:,:,new_year][r.qIO.loc[:,:,ref_year] != 0] - r.qIO.loc[:,:,ref_year][r.qIO.loc[:,:,2017] != 0]) \ + / r.qIO.loc[:,:,ref_year][r.qIO.loc[:,:,ref_year] != 0] * 100 # pct. change + +with pd.option_context('display.max_rows', None, + 'display.max_columns', None, + 'display.precision', 3, + ): + print(qIO_pct_change) + +np.max(qIO_pct_change) # check max pct. change + +# 2. compare ARIMA's +ARIMA_input = dt.Gdx("..//..//Model/Gdx/ARIMA_forecast_input.gdx") +r = dt.Gdx("..//..//Model/Gdx/previous_ARIMA_forecasts.gdx") +n = dt.Gdx("..//..//Model/Gdx/ARIMA_forecasts.gdx") + + +# loop through multi-indeces etc... +varnames_single = [] # varname list +varnames_multiindex1 = [] # varname list +varnames_multiindex2 = [] # varname list +varnames_multiindex1_noset = [] # varnames without set +varnames_multiindex2_noset = [] # varnames without set +keys_multiindex1 = [] # set keys, same length as varname list +keys_multiindex2 = [] # set keys, same length as varname list +drift_year = 2020 # year to check drift/trend reversion from + +for i in r.keys(): + if i == 'RVTAFGEU2VTAFG': + print('Exception for RVTAFGEU2VTAFG') # not callable + else: + if isinstance(n[i].index, pd.MultiIndex): + if len(n[i].index.levshape)>2: + keys = [*set(list(n[i].droplevel(2).keys()))] + for j in keys: + drift_new = not (n[i][j[0]][j[1]].loc[drift_year] == n[i][j[0]][j[1]].loc[drift_year:]).all() + drift_old = not (r[i][j[0]][j[1]].loc[drift_year] == r[i][j[0]][j[1]].loc[drift_year:]).all() + if drift_new or drift_old: # check if drift in new or old forecasts + varnames_multiindex2.append(i+"["+str(j[0])+","+str(j[1])+"]") + varnames_multiindex2_noset.append(i) + keys_multiindex2.append(j) + else: + keys = [*set(list(n[i].droplevel(1).keys()))] + for j in keys: + drift_new = not (n[i][j].loc[drift_year] == n[i][j].loc[drift_year:]).all() + drift_old = not (r[i][j].loc[drift_year] == r[i][j].loc[drift_year:]).all() + if drift_new or drift_old: # check if drift in new or old forecasts + varnames_multiindex1.append(i+"["+str(j)+"]") + varnames_multiindex1_noset.append(i) + keys_multiindex1.append(j) + else: + drift_new = not (n[i].loc[drift_year] == n[i].loc[drift_year:]).all() + drift_old = not (r[i].loc[drift_year] == r[i].loc[drift_year:]).all() + if drift_new or drift_old: # check if drift in new or old forecasts + varnames_single.append(i) + +# first single variables +ncols = 4 +num = len(varnames_single) +nrows = num//ncols+1 +if num%ncols == 0: nrows -= 1 + +fig = plt.figure(figsize=(6*ncols,4*nrows),dpi=100) + +for i,varname in enumerate(varnames_single): + + ax = fig.add_subplot(nrows,ncols,i+1) + title = varname + ax.set_title(title,fontsize=14) + # find variable name for input + input_keys = list(ARIMA_input.keys()) + input_keys_cap = [x.upper() for x in input_keys] + ax.plot(ARIMA_input[input_keys[input_keys_cap.index(varname)]].loc[1983:].replace(0, np.nan),color='black') # make 0 NA + ax.plot(r[varname],label="Old forecast",color='red') + ax.plot(n[varname],label="New forecast",color='blue') + ax.legend(loc='upper right') + ax.set_xlabel('Year') + ax.set_xlim(1983,n[varname].index[-1]) + +fig.tight_layout() +plt.savefig('Output//ARIMA_drift_noset.pdf') + +# next multi-index variables, single +ncols = 4 +nrows = 6 + +start = 0 # subplot counter + +len_varnames = len(varnames_multiindex1)/20 +if not len_varnames.is_integer(): + len_varnames = round(len_varnames) + 1 +else: + len_varnames = int(len_varnames) + +for i in range(len_varnames): + fig = plt.figure(figsize=(6*ncols,4*nrows),dpi=100) + + for j,varname in enumerate(varnames_multiindex1[start:start+20]): + + ax = fig.add_subplot(nrows,ncols,j+1) + j += start + title = varname + ax.set_title(title,fontsize=14) + input_keys = list(ARIMA_input.keys()) + input_keys_cap = [x.upper() for x in input_keys] + ax.plot(ARIMA_input[input_keys[input_keys_cap.index(varnames_multiindex1_noset[j])]][keys_multiindex1[j]].loc[1983:].replace(0, np.nan),color='black') # make 0 NA + ax.plot(r[varnames_multiindex1_noset[j]][keys_multiindex1[j]],label="Old forecast",color='red') + ax.plot(n[varnames_multiindex1_noset[j]][keys_multiindex1[j]],label="New forecast",color='blue') + ax.legend(loc='upper right') + ax.set_xlabel('Year') + ax.set_xlim(1983,n[varnames_multiindex1_noset[j]][keys_multiindex1[j]].index[-1]) + fig.tight_layout() + plt.savefig('Output//ARIMA_drift_1set' + str(i) + '.pdf') + start += 20 + +# next multi-index variables, double +start = 0 # subplot counter + +len_varnames = len(varnames_multiindex2)/20 +if not len_varnames.is_integer(): + len_varnames = round(len_varnames) + 1 +else: + len_varnames = int(len_varnames) + +for i in range(len_varnames): + fig = plt.figure(figsize=(6*ncols,4*nrows),dpi=100) + + for j,varname in enumerate(varnames_multiindex2[start:start+20]): + + ax = fig.add_subplot(nrows,ncols,j+1) + j += start + title = varname + ax.set_title(title,fontsize=14) + input_keys = list(ARIMA_input.keys()) + input_keys_cap = [x.upper() for x in input_keys] + ax.plot(ARIMA_input[input_keys[input_keys_cap.index(varnames_multiindex2_noset[j])]][keys_multiindex2[j]].loc[1983:].replace(0, np.nan),color='black') # make 0 NA + ax.plot(r[varnames_multiindex2_noset[j]][keys_multiindex2[j]],label="Old forecast",color='red') + ax.plot(n[varnames_multiindex2_noset[j]][keys_multiindex2[j]],label="New forecast",color='blue') + ax.legend(loc='upper right') + ax.set_xlabel('Year') + ax.set_xlim(1983,n[varnames_multiindex2_noset[j]][keys_multiindex2[j]].index[-1]) + fig.tight_layout() + plt.savefig('Output//ARIMA_drift_2sets' + str(i) + '.pdf') + start += 20 + +# combine PDFs +if os.path.isfile("Output//ARIMA_comparison.pdf"): + os.remove("Output//ARIMA_comparison.pdf") +x = [a for a in os.listdir("Output") if a.endswith(".pdf")] +pdf_list = ["Output//" + i for i in x] +merger = PdfMerger() + +for pdf in pdf_list: + merger.append(open(pdf, 'rb')) + +with open("Output//ARIMA_comparison.pdf", "wb") as fout: + merger.write(fout) + +# delete single pdf files +if "Output//ARIMA_comparison.pdf" in pdf_list: + pdf_list.remove("Output//ARIMA_comparison.pdf") + for dir in pdf_list: + os.remove(dir) +else: + for dir in pdf_list: + os.remove(dir) \ No newline at end of file diff --git a/Analysis/Baseline/variables_to_plot.py b/Analysis/Baseline/variables_to_plot.py new file mode 100644 index 0000000..e132a5a --- /dev/null +++ b/Analysis/Baseline/variables_to_plot.py @@ -0,0 +1,86 @@ +import dreamtools as dt + +""" +List of tuples each containing information about a variable to plot +( +