plot.py

import dash
from dash import dcc, html, Input, Output, ctx, dash_table
import plotly.express as px
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import os

# Load the data
file_path = '/Users/wexu/Downloads/CODE/data_forecast.csv'
data = pd.read_csv(file_path)
data['Year'] = pd.to_datetime(data['Year']).dt.year
data = data.sort_values('Year')
data['Is_Forecast'] = data['Is_Forecast'] == "Yes"

# Initialize the app
app = dash.Dash(__name__, suppress_callback_exceptions=True)

app.layout = html.Div([
    dcc.Tabs(id="tabs", value='tab-1', children=[
        dcc.Tab(label='Data Visualization', value='tab-1'),
        dcc.Tab(label='Statistical Summary', value='tab-2'),
        dcc.Tab(label='Error Metrics', value='tab-3'),  # New tab for error metrics
    ]),
    html.Div(id='tabs-content')
])

@app.callback(Output('tabs-content', 'children'),
              Input('tabs', 'value'))
def render_content(tab):
    if tab == 'tab-1':
        return html.Div([
            html.Div([
                dcc.Dropdown(
                    id='country-dropdown',
                    options=[{'label': country, 'value': country} for country in data['Country'].unique()],
                    value=['United States'],
                    multi=True
                ),
                dcc.Dropdown(
                    id='category-dropdown',
                    options=[{'label': col, 'value': col} for col in data.columns if col not in ['Year', 'Is_Forecast', 'Country']],
                    value='Economic Quality'
                ),
            ], style={'width': '100%', 'display': 'inline-block'}),
            dcc.Graph(id='time-series-chart'),
            dcc.RangeSlider(
                id='year-slider',
                min=data['Year'].min(),
                max=data['Year'].max(),
                value=[data['Year'].min(), data['Year'].max()],
                marks={str(year): str(year) for year in data['Year'].unique()},
                step=None
            ),
            html.Button("Download CSV", id="btn_csv"),
            dcc.Download(id="download-dataframe-csv")
        ])
    elif tab == 'tab-2':
        return html.Div([
            html.H4('Statistical Summary'),
            dcc.Dropdown(
                id='country-summary-dropdown',
                options=[{'label': country, 'value': country} for country in data['Country'].unique()],
                value='United States',
                clearable=False
            ),
            dash_table.DataTable(id='summary-table', style_table={'overflowY': 'auto'})
        ])
    elif tab == 'tab-3':  # Content for the new Error Metrics tab
        return html.Div([
            html.H4('Error Metrics'),
            dcc.Dropdown(
                id='country-error-dropdown',
                options=[{'label': country, 'value': country} for country in data['Country'].unique()],
                value='United States',  # Default to 'United States'
                clearable=False
            ),
            dcc.Dropdown(
                id='category-error-dropdown',
                options=[{'label': col, 'value': col} for col in data.columns if col not in ['Year', 'Is_Forecast', 'Country']],
                value='Economic Quality'  # Default to 'Economic Quality'
            ),
            dash_table.DataTable(id='error-metrics-table', style_table={'overflowY': 'auto'})
        ])

@app.callback(
    Output('time-series-chart', 'figure'),
    [Input('country-dropdown', 'value'),
     Input('category-dropdown', 'value'),
     Input('year-slider', 'value')]
)
def update_graph(selected_countries, selected_category, year_range):
    dff = data[(data['Year'] >= year_range[0]) & (data['Year'] <= year_range[1])]
    actual_data = dff[dff['Is_Forecast'] == False]
    forecast_data = dff[dff['Is_Forecast'] == True]

    fig = px.line(actual_data[actual_data['Country'].isin(selected_countries)],
                  x='Year', y=selected_category, color='Country', markers=True,
                  labels={selected_category: f'{selected_category} Ranking'},
                  title=f'Time Series of {selected_category}')

    for country in selected_countries:
        country_forecast_data = forecast_data[forecast_data['Country'] == country]
        if not country_forecast_data.empty:
            fig.add_scatter(x=country_forecast_data['Year'], y=country_forecast_data[selected_category],
                            mode='lines+markers', name=f'{country} (Forecast)', line=dict(dash='dot'),
                            hoverinfo='name+y+x')

    fig.update_layout(hovermode='x unified')
    return fig

@app.callback(
    [Output('error-metrics-table', 'data'), Output('error-metrics-table', 'columns')],
    [Input('country-error-dropdown', 'value'), Input('category-error-dropdown', 'value')]
)
def update_error_metrics(selected_country, selected_category):
    actual_data = data[(data['Country'] == selected_country) & (data['Is_Forecast'] == False)]
    forecast_data = data[(data['Country'] == selected_country) & (data['Is_Forecast'] == True)]
    if not actual_data.empty and not forecast_data.empty:
        merged_data = pd.merge(
            actual_data[['Year', selected_category]],
            forecast_data[['Year', selected_category]],
            on='Year',
            suffixes=('_actual', '_forecast')
        )
        if not merged_data.empty:
            mse = mean_squared_error(merged_data[selected_category + '_actual'], merged_data[selected_category + '_forecast'])
            rmse = np.sqrt(mse)
            mae = mean_absolute_error(merged_data[selected_category + '_actual'], merged_data[selected_category + '_forecast'])

            error_metrics = pd.DataFrame({
                "Metric": ["MSE", "RMSE", "MAE"],
                "Value": [mse, rmse, mae]
            })
            return error_metrics.to_dict('records'), [{"name": i, "id": i} for i in error_metrics.columns]

    return [], [{"name": "Metric", "id": "Metric"}, {"name": "Value", "id": "Value"}]

@app.callback(
    [Output('summary-table', 'data'), Output('summary-table', 'columns')],
    [Input('country-summary-dropdown', 'value')]
)
def update_statistical_summary(selected_country):
    filtered_data = data[data['Country'] == selected_country].select_dtypes(include=[np.number])
    description = filtered_data.describe().reset_index()
    description = description.round(2)
    return description.to_dict('records'), [{"name": i, "id": i} for i in description.columns]

if __name__ == '__main__':
    app.run_server(debug=True)