-
Notifications
You must be signed in to change notification settings - Fork 1
/
analysis_utils.py
80 lines (69 loc) · 2.65 KB
/
analysis_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
utility methods for analyzing data collected generated by this model
"""
import altair as alt
import polars as pl
from simulatingrisk.hawkdovemulti.model import RiskState
def groupby_population_risk_category(df):
"""takes a polars dataframe populated with model data generated
by hawk/dove multi model, groups by population risk category and
adds group labels."""
# currently written for polars dataframe
# group on risk category to get totals for the number of runs that
# ended up in each different type
poprisk_grouped = df.group_by("population_risk_category").count()
poprisk_grouped = poprisk_grouped.rename(
{"population_risk_category": "risk_category"}
)
poprisk_grouped = poprisk_grouped.sort("risk_category")
# add column with readable group labels for the numeric categories
poprisk_grouped = poprisk_grouped.with_columns(
pl.Series(
name="type",
values=poprisk_grouped["risk_category"].map_elements(RiskState.category),
)
)
return poprisk_grouped
def graph_population_risk_category(poprisk_grouped):
"""given a dataframe grouped by :meth:`groupby_population_risk_category`,
generate an altair chart graphing the number of runs in each type,
grouped and labeled by the larger categories."""
return (
alt.Chart(poprisk_grouped)
.mark_bar(width=15)
.encode(
x=alt.X(
"risk_category",
title="risk category",
axis=alt.Axis(tickCount=13), # 13 categories
scale=alt.Scale(domain=[1, 13]),
),
y=alt.Y("count", title="Number of runs"),
color=alt.Color("type", title="type"),
)
.properties(title="Distribution of runs by final population risk category")
)
def grouped_risk_totals(df):
"""Given a Polars dataframe populated with model data generated
by hawk/dove multi model, calculate total number of agents by
groups of risk level categories."""
# NOTE: based on risk level groupings used in
# model method for calculating population risk category
return df.with_columns(
# risk inclined: 0, 1, 2
pl.col("total_r0")
.add(pl.col("total_r1"))
.add(pl.col("total_r2"))
.alias("risk_inclined"),
# risk moderate: 3, 4, 5, 6
pl.col("total_r3")
.add(pl.col("total_r4"))
.add(pl.col("total_r5"))
.add(pl.col("total_r6"))
.alias("risk_moderate"),
# risk avoidant: 7, 8, 9
pl.col("total_r7")
.add(pl.col("total_r8"))
.add(pl.col("total_r9"))
.alias("risk_avoidant"),
)