From 46394b44a0520fd555172131d63ecb88b5c6c96f Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 30 Jul 2024 14:02:59 -0400 Subject: [PATCH] Analysis of revised convergence logic and simulation run length --- .../hdm_convergence_runlength.ipynb | 849 ++++++++++++++++++ 1 file changed, 849 insertions(+) create mode 100644 notebooks/new_convergence/hdm_convergence_runlength.ipynb diff --git a/notebooks/new_convergence/hdm_convergence_runlength.ipynb b/notebooks/new_convergence/hdm_convergence_runlength.ipynb new file mode 100644 index 0000000..0701bc6 --- /dev/null +++ b/notebooks/new_convergence/hdm_convergence_runlength.ipynb @@ -0,0 +1,849 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ea3feea1-3ed9-4d7f-9f90-cb6ae56e39da", + "metadata": {}, + "source": [ + "# run length and adapt vs. average risk adjustment\n", + "\n", + "- how long does hawk/dove multi risk attitude take to converge with the new logic?\n", + " - how many do not converge ?\n", + "- what difference does it make if we use adapt or average risk adjustment strategy?" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "id": "4edfc5bc-ab4c-435b-8c0c-593d560a1445", + "metadata": {}, + "outputs": [], + "source": [ + "import polars as pl\n", + "\n", + "df = pl.scan_csv(\"../../data/hawkdovemulti/riskadjust/dist-uniform/*.csv\").collect()" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "cc794dde-f8d5-4bc6-9eb5-059741196600", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Analyzing 1000 runs\n" + ] + } + ], + "source": [ + "total_runs = len(df)\n", + "\n", + "print(f\"Analyzing {total_runs} runs\")" + ] + }, + { + "cell_type": "markdown", + "id": "bddba8b7-1194-4036-8e6b-38fa57bb6d00", + "metadata": {}, + "source": [ + "## simulation run length\n", + "\n", + "They either finished very quickly (~50 steps) or never finished)" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "eaa053a9-2567-41e7-9c8a-0bc76c4c05c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (9, 2)
statisticvalue
strf64
"count"1000.0
"null_count"0.0
"mean"2904.7
"std"2131.600767
"min"60.0
"25%"750.0
"50%"3500.0
"75%"5500.0
"max"5500.0
" + ], + "text/plain": [ + "shape: (9, 2)\n", + "┌────────────┬─────────────┐\n", + "│ statistic ┆ value │\n", + "│ --- ┆ --- │\n", + "│ str ┆ f64 │\n", + "╞════════════╪═════════════╡\n", + "│ count ┆ 1000.0 │\n", + "│ null_count ┆ 0.0 │\n", + "│ mean ┆ 2904.7 │\n", + "│ std ┆ 2131.600767 │\n", + "│ min ┆ 60.0 │\n", + "│ 25% ┆ 750.0 │\n", + "│ 50% ┆ 3500.0 │\n", + "│ 75% ┆ 5500.0 │\n", + "│ max ┆ 5500.0 │\n", + "└────────────┴─────────────┘" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Step\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "31aa7cd7-a9ab-408a-9266-218238c79e01", + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":Histogram [Step] (Step_count)" + ] + }, + "execution_count": 146, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p2751" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"Step\"].plot.hist()" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "id": "97a6df2a-d8dc-4f61-b356-a0e52571d93f", + "metadata": {}, + "outputs": [], + "source": [ + "# what about those that converged?\n", + "\n", + "converged = df.filter(pl.col(\"status\") == \"converged\")" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "9371939e-2223-40b3-9989-4172a69741f6", + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":Histogram [Step] (Step_count)" + ] + }, + "execution_count": 148, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p2817" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "converged[\"Step\"].plot.hist()" + ] + }, + { + "cell_type": "markdown", + "id": "653fb461-d563-40ca-99f4-fc65e3d0a259", + "metadata": {}, + "source": [ + "### what % converged?" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "id": "2a819ce2-eb8b-40bf-8a0c-7509989b4401", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (2, 2)
statuscount
stru32
"running"858
"converged"142
" + ], + "text/plain": [ + "shape: (2, 2)\n", + "┌───────────┬───────┐\n", + "│ status ┆ count │\n", + "│ --- ┆ --- │\n", + "│ str ┆ u32 │\n", + "╞═══════════╪═══════╡\n", + "│ running ┆ 858 │\n", + "│ converged ┆ 142 │\n", + "└───────────┴───────┘" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "status_totals = df[\"status\"].value_counts()\n", + "status_totals" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "id": "0c121e06-1d76-4a26-8bd4-50ede7c558e3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "142 runs out of 1000; 14.20% complete\n" + ] + } + ], + "source": [ + "converg_total = status_totals.filter(status_totals[\"status\"] == \"converged\")[\"count\"][0]\n", + "\n", + "print(f\"{converg_total} runs out of {total_runs}; {converg_total/total_runs*100:.2f}% complete\")" + ] + }, + { + "cell_type": "markdown", + "id": "fa40e5ee-55d6-44c1-9386-a7a72417b0d7", + "metadata": {}, + "source": [ + "### risk adjustment (adopt / average)\n", + "\n", + "hypothesis: adjustment strategy does not have a significant impact on the final result, only affects how long it takes to get there" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "1dfbf4b2-1525-4f00-9a83-93a49b72e085", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "adopt: 140 rows\n", + "average: 2 rows\n" + ] + } + ], + "source": [ + "from scipy import stats\n", + "\n", + "\n", + "df_riskadjust = converged.clone()\n", + "\n", + "# TODO: make reusable functions for annotating data\n", + "\n", + "for i in range(0, 10):\n", + " # calculate new series based on existing \n", + " pct_risk_category = df_riskadjust.select(pl.col(f\"total_r{i}\") / pl.col(\"total_agents\"))\n", + " # add new column to the dataframe\n", + " df_riskadjust = df_riskadjust.with_columns(pl.Series(name=f\"pct_r{i}\", values=pct_risk_category))\n", + "\n", + "df_riskadjust = df_riskadjust.with_columns(\n", + " pl.Series('pct_risk_inclined', values=df_riskadjust.select((pl.col(\"total_r0\") + pl.col(\"total_r1\") + pl.col(\"total_r2\")) / pl.col(\"total_agents\"))),\n", + " pl.Series('pct_risk_moderate', values=df_riskadjust.select((pl.col(\"total_r3\") + pl.col(\"total_r4\") + pl.col(\"total_r5\") + pl.col(\"total_r6\")) / pl.col(\"total_agents\"))),\n", + " pl.Series('pct_risk_avoidant', values=df_riskadjust.select((pl.col(\"total_r7\") + pl.col(\"total_r8\") + pl.col(\"total_r9\")) / pl.col(\"total_agents\")))\n", + ")\n", + "\n", + "df_riskadjust = df_riskadjust.with_columns(pl.Series('risk_attitude_mean', values=df_riskadjust.select(\n", + " (pl.col(\"total_r1\") + pl.col(\"total_r2\")*2 + pl.col(\"total_r3\")*3 + pl.col(\"total_r4\")*4 + pl.col(\"total_r5\")*5 + pl.col(\"total_r6\")*6 + pl.col(\"total_r7\")*7 + pl.col(\"total_r8\")*8 + pl.col(\"total_r9\")*9) \n", + " / pl.col(\"total_agents\"))))\n", + "\n", + "\n", + "df_adopt = df_riskadjust.filter((pl.col(\"risk_adjustment\") == \"adopt\"))\n", + "df_avg = df_riskadjust.filter((pl.col(\"risk_adjustment\") == \"average\"))\n", + "\n", + "print(f\"adopt: {len(df_adopt):,} rows\")\n", + "print(f\"average: {len(df_avg):,} rows\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "484c7785-b067-4253-8a33-2c66c86a05ff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "TtestResult(statistic=array([0.58333333]), pvalue=array([0.66381736]), df=array([1]))" + ] + }, + "execution_count": 152, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "maxlen = min(len(df_adopt), len(df_avg))\n", + "\n", + "stats.ttest_rel(df_adopt.select(\"pct_risk_inclined\")[:maxlen], df_avg.select(\"pct_risk_inclined\")[:maxlen])" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "3f420203-f683-40bf-8ffa-9767d6cbdc09", + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":BoxWhisker [risk_adjustment] (Step)" + ] + }, + "execution_count": 153, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p2883" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "df_riskadjust.plot.box(\"Step\", by='risk_adjustment')" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "2872f0c6-85aa-476a-8297-7a664266c42b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.HConcatChart(...)" + ] + }, + "execution_count": 154, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from simulatingrisk.hawkdovemulti import analysis_utils\n", + "import importlib\n", + "importlib.reload(analysis_utils)\n", + "\n", + "# df_adopt, df_average\n", + "\n", + "adopt_chart = analysis_utils.graph_population_risk_category(\n", + " analysis_utils.groupby_population_risk_category(df_adopt)\n", + ").properties(title=\"risk adjust: adopt\")\n", + " \n", + "average_chart = analysis_utils.graph_population_risk_category(\n", + " analysis_utils.groupby_population_risk_category(df_avg)\n", + ").properties(title=\"risk adjust: average\")\n", + "\n", + "(adopt_chart | average_chart).properties(title=\"distribution of population category by run\").resolve_scale(y='shared')" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "id": "2242207f-0d44-492c-83ba-3c4f6beb892b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(858, 23)" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "not_converged = df.filter(pl.col(\"status\") == \"running\")\n", + "not_converged.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "c7937841-63b8-484d-9061-da3462d49c18", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "shape: (9, 2)
statisticvalue
strf64
"count"858.0
"null_count"0.0
"mean"10.850816
"std"7.569892
"min"1.0
"25%"4.0
"50%"10.0
"75%"16.0
"max"35.0
" + ], + "text/plain": [ + "shape: (9, 2)\n", + "┌────────────┬───────────┐\n", + "│ statistic ┆ value │\n", + "│ --- ┆ --- │\n", + "│ str ┆ f64 │\n", + "╞════════════╪═══════════╡\n", + "│ count ┆ 858.0 │\n", + "│ null_count ┆ 0.0 │\n", + "│ mean ┆ 10.850816 │\n", + "│ std ┆ 7.569892 │\n", + "│ min ┆ 1.0 │\n", + "│ 25% ┆ 4.0 │\n", + "│ 50% ┆ 10.0 │\n", + "│ 75% ┆ 16.0 │\n", + "│ max ┆ 35.0 │\n", + "└────────────┴───────────┘" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "not_converged[\"num_agents_risk_changed\"].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "9dff6e0a-54f8-460a-bb7e-859d67017e04", + "metadata": {}, + "outputs": [ + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
\n", + "
\n", + "
\n", + "" + ], + "text/plain": [ + ":BoxWhisker (num_agents_risk_changed)" + ] + }, + "execution_count": 160, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p3032" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "not_converged.plot.box(\"num_agents_risk_changed\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}