-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathab-test.py
188 lines (156 loc) · 9.56 KB
/
ab-test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
import pandas as pd
import numpy as np
import sys
import glob
import os
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import shapiro, levene, ttest_ind, mannwhitneyu, f_oneway
sys.path.append("pythonProject/Github/Me/ab-test/")
from helpers.pandas_options import set_pandas_options
set_pandas_options(width=500, precision=4)
# We add determinant column to each dataframe and generate csv again to be aware of the distinction between test groups before merging dataframe.
def add_column_to_csv(data):
os.makedirs("pythonProject/Github/Me/ab-test/datasets/adj_datasets", exist_ok=True)
dataframe = pd.read_csv(data)
dataframe["Version"] = data.split(",")[0].split("- ")[1].upper()
dataframe.to_csv("pythonProject/Github/Me/ab-test/datasets/adj_datasets/" + data.split(",")[0].split("- ")[1] + ".csv")
for csv in list(glob.glob("ab-test/datasets" + "/*.csv")):
add_column_to_csv(csv)
# We can read multiple csv using glob.
files = list(glob.glob("pythonProject/Github/Me/ab-test/datasets/adj_datasets" + "/*.csv"))
read_files = [pd.read_csv(file) for file in files]
df = pd.concat(read_files, ignore_index=True)
df = df[["Name", "No. clicks", "Version"]].rename(columns={"No. clicks": "Click"})
df.head()
# Name Click Version
# 0 FIND 502 CONNECT
# 1 s.q 357 CONNECT
# 2 lib.montana.edu/find/ 171 CONNECT
# 3 Montana State University Libraries - Home 83 CONNECT
# 4 Hours 74 CONNECT
# we create function to analyze the proportion of click in each version
def group_test_assessment(dataframe, group_name):
version_total_click = {group_name: dataframe[dataframe["Version"] == group_name]["Click"].sum()}
for version, click in version_total_click.items():
if version == "INTERACT":
print("VERSION - INTERACT (CONTROL GROUP)")
else:
print(f"VERSION - {version} (TEST GROUP)")
dataframe = dataframe[
(dataframe["Name"].isin(["FIND", "REQUEST", version])) & (dataframe["Version"] == version)].groupby("Name")["Click"].sum().reset_index()
print("#################################")
return dataframe
df_interact = group_test_assessment(df, "INTERACT")
df_connect = group_test_assessment(df, "CONNECT")
df_learn = group_test_assessment(df, "LEARN")
df_help = group_test_assessment(df, "HELP")
df_services = group_test_assessment(df, "SERVICES")
df_services.head()
# Name Click
# 0 FIND 397
# 1 REQUEST 57
# 2 SERVICES 45
# Pie visualization of distribution of click in each version --> visualization/pie_charts.png
colors = ["slategray", "gray", "silver"]
fig, ax = plt.subplots(3, 3, figsize=(10, 10))
ax[0, 1].set_title("Homepage Click-through Rates - Interact (Control)", pad=32, fontstyle='italic',
fontsize="12").set_fontsize(10)
ax[0, 1].pie(df_interact["Click"].values, labels=df_interact["Name"].values, autopct='%.1f%%',
wedgeprops={'edgecolor': "white", 'linewidth': 1}, textprops={'fontsize': 8}, radius=1.6,
colors=colors, explode=(0, 0.2, 0))
ax[1, 0].set_title("Click-through Rates - Connect", fontstyle='italic').set_fontsize(10)
ax[1, 0].pie(df_connect["Click"].values, labels=df_connect["Name"].values, autopct='%.1f%%',
wedgeprops={'edgecolor': "white", 'linewidth': 1}, textprops={'fontsize': 8}, radius=1,
colors=colors, explode=(0.2, 0, 0))
ax[1, 2].set_title("Click-through Rates - Learn", pad=-16, fontstyle='italic').set_fontsize(10)
ax[1, 2].pie(df_learn["Click"].values, labels=df_learn["Name"].values, autopct='%.1f%%',
wedgeprops={'edgecolor': "white", 'linewidth': 1}, textprops={'fontsize': 8}, radius=1,
colors=colors, explode=(0, 0.2, 0))
ax[2, 0].set_title("Click-through Rates - Help", fontstyle='italic').set_fontsize(10)
ax[2, 0].pie(df_help["Click"].values, labels=df_help["Name"].values, autopct='%.1f%%',
wedgeprops={'edgecolor': "white", 'linewidth': 1}, textprops={'fontsize': 8}, radius=1,
colors=colors, explode=(0, 0.2, 0))
ax[2, 2].set_title("Click-through Rates - Services", fontstyle='italic').set_fontsize(10)
ax[2, 2].pie(df_services["Click"].values, labels=df_services["Name"].values, autopct='%.1f%%',
wedgeprops={'edgecolor': "white", 'linewidth': 1}, textprops={'fontsize': 8}, radius=1,
colors=colors, explode=(0, 0, 0.2))
ax[0, 0].axis("off")
ax[0, 2].axis("off")
ax[2, 1].axis("off")
ax[1, 1].axis("off")
plt.show()
# Defining AB Test Function
def ab_test(dataframe, control_group, test_group):
# Hypothesis
# H0: There is no statistically significant difference between the means of the two versions.
# H1: There is a statistically significant difference between the means of the two versions.
# Normality Assumption
# H0: The assumption of normal distribution is not provided.
# H1: The assumption of normal distribution is provided.
# Assumption of Homogeneity of Variance
# H0: The assumption of homogeneity of variance is not provided.
# H1: The assumption of homogeneity of variance is provided.
# Normality Assumption of Control Group:
pvalue_control_group = shapiro(dataframe.loc[dataframe["Version"] == control_group, "Click"])[1]
print("For Control Group Normality Assumption P-value = %.4f" % pvalue_control_group)
# Normality Assumption of Test Group:
pvalue_test_group = shapiro(dataframe.loc[dataframe["Version"] == test_group, "Click"])[1]
print("For Test Group Normality Assumption P-value = %.4f" % pvalue_test_group)
if pvalue_control_group and pvalue_test_group < 0.05:
print("Normality assumption H0 hypothesis rejected. The assumption of normal distribution is provided.")
pvalue_levene = levene(dataframe.loc[dataframe["Version"] == control_group, "Click"],
dataframe.loc[dataframe["Version"] == test_group, "Click"])[1]
if pvalue_levene < 0.05:
print("Assumption of homogeneity of variance H0 hypothesis rejected. The assumption of homogeneity of variance is provided.")
ttest = ttest_ind(dataframe.loc[dataframe["Version"] == control_group, "Click"],
dataframe.loc[dataframe["Version"] == test_group, "Click"], equal_var=True)[1]
else:
print("Assumption of homogeneity of variance H0 hypothesis is not rejected. The assumption of homogeneity of variance is not provided.")
ttest = ttest_ind(dataframe.loc[dataframe["Version"] == control_group, "Click"],
dataframe.loc[dataframe["Version"] == test_group, "Click"], equal_var=False)[1]
else:
print("Normality assumption H0 hypothesis is not rejected. The assumption of normal distribution is not provided.")
ttest = mannwhitneyu(dataframe.loc[dataframe["Version"] == control_group, "Click"],
dataframe.loc[dataframe["Version"] == test_group, "Click"])[1]
df_result = pd.DataFrame(index=["Result"])
df_result["Test Type"] = np.where((pvalue_control_group) and (pvalue_test_group) < 0.05, "Parametric", "Non-Parametric")
df_result["P-value"] = ttest
df_result["Hypothesis Result"] = np.where(ttest < 0.05, "Rejected", "Not Rejected")
df_result["Comment"] = np.where(df_result["Hypothesis Result"] == "Rejected", "There is a statistically significant difference between the means of the two versions", "There is no statistically significant difference between the means of the two versions")
return df_result
ab_test(df, "INTERACT", "CONNECT")
ab_test(df, "INTERACT", "LEARN")
ab_test(df, "INTERACT", "HELP")
ab_test(df, "INTERACT", "SERVICES")
# Test Type P-value Hypothesis Result Comment
# Result Parametric 0.2617 Not Rejected There is no statistically significant difference between the means of the two versions
# Multiple Comparison of Means (ANOVA - Analysis of Variance)
# Since the assumption of normality is known, we can apply f_oneway test, if is not known we should use kruskal test
pvalue = f_oneway(df.loc[df["Version"] == "INTERACT", "Click"],
df.loc[df["Version"] == "CONNECT", "Click"],
df.loc[df["Version"] == "LEARN", "Click"],
df.loc[df["Version"] == "HELP", "Click"],
df.loc[df["Version"] == "SERVICES", "Click"])[1]
print("p-value = %.4f" % pvalue)
# p-value = 0.6150
# Comparisons within each group
from statsmodels.stats.multicomp import MultiComparison
comparison = MultiComparison(df["Click"], df["Version"])
tukey = comparison.tukeyhsd(0.05)
print(tukey.summary())
# Multiple Comparison of Means - Tukey HSD, FWER=0.05
# =========================================================
# group1 group2 meandiff p-adj lower upper reject
# ---------------------------------------------------------
# CONNECT HELP 2.7607 0.9999 -58.0751 63.5966 False
# CONNECT INTERACT 26.464 0.7219 -31.6426 84.5706 False
# CONNECT LEARN -0.7169 1.0 -60.3027 58.8689 False
# CONNECT SERVICES -1.9281 1.0 -63.911 60.0548 False
# HELP INTERACT 23.7033 0.7989 -34.6796 82.0861 False
# HELP LEARN -3.4776 0.9999 -63.3329 56.3776 False
# HELP SERVICES -4.6888 0.9996 -66.9308 57.5531 False
# INTERACT LEARN -27.1809 0.6871 -84.2601 29.8982 False
# INTERACT SERVICES -28.3921 0.6864 -87.9692 31.185 False
# LEARN SERVICES -1.2112 1.0 -62.2319 59.8095 False
# ---------------------------------------------------------