Skip to content

Commit

Permalink
Improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
rsangole committed Dec 8, 2024
1 parent 57d270c commit 8d68f82
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion docs/app.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"name": "app.py", "content": "from __future__ import annotations\nfrom scipy.stats import norm\nfrom shiny import App, reactive, render, ui\nfrom shinyswatch.theme import cosmo as shiny_theme\nfrom shinywidgets import output_widget, render_widget\nimport numpy as np\nimport pandas as pd\nimport plotly.express as px\nimport shinyswatch\n\nCOL_TXT = \"#0081a7\"\nCOL_treatment = \"#0081a7\"\nCOL_control = \"#00afb9\"\nCOL_permutation = \"#c0c0c0\"\nCOL_perm_highlight = \"#f07167\"\n\napp_ui = ui.page_sidebar(\n ui.sidebar(\n ui.markdown(\n \"Based on Samuele Mazzanti's [Medium post](https://towardsdatascience.com/why-statistical-significance-is-pointless-a7644be30266), this app makes interactive the two ideas of statistical significance which Samuele explores.\"\n ),\n ui.input_slider(\n id=\"treatment_mean\",\n label=\"Treatment Mean\",\n min=1,\n max=20,\n value=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_mean\",\n label=\"Control Mean\",\n min=0,\n max=20,\n value=10.5,\n step=0.1,\n ),\n ui.input_slider(\n id=\"treatment_cov\",\n label=\"Treatment Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_cov\",\n label=\"Control Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"n_points\",\n label=\"Points per Group\",\n min=10,\n max=300,\n value=100,\n step=10,\n ),\n ui.input_slider(\n id=\"n_permutations\",\n label=\"Number of Permutations\",\n min=100,\n max=10000,\n value=1000,\n step=1000,\n ),\n open=\"always\",\n bg=\"#f8f8f8\",\n ),\n ui.navset_tab(\n ui.nav_panel(\n \"P-Values\",\n ui.column(\n 10,\n ui.row(\n ui.column(\n 6,\n output_widget(\n \"treatment_control_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.h5(\"Simulated Data\"),\n ui.output_ui(\"txt_pop_dif\"),\n ui.br(),\n ui.output_ui(\"txt_sample_dif\"),\n ),\n ui.column(\n 6,\n output_widget(\n \"permutation_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.output_data_frame(\"pval_df\"),\n ),\n ),\n ),\n ),\n ui.nav_panel(\n \"Confidence Intervals\",\n ui.em(\"Coming soon!\"),\n )\n ),\n ui.br(),\n ui.HTML(\n \"<div style='text-align: center; color: gray; font-size:0.9em;'> Created using Shiny for Python | <a href = 'http://www.rsangole.com'>Rahul Sangole</a> | Dec '24</div>\"\n ),\n fillable=False,\n title=\"Why \u201cStatistical Significance\u201d Is Pointless\",\n theme=shiny_theme,\n)\n\n\ndef server(input, output, session):\n @reactive.Calc\n def treatment():\n return norm.rvs(\n input.treatment_mean(),\n input.treatment_cov(),\n input.n_points(),\n random_state=42,\n )\n\n @reactive.Calc\n def control():\n return norm.rvs(\n input.control_mean(), \n input.control_cov(), \n input.n_points(), \n random_state=42\n )\n\n @reactive.Calc\n def sample_mean_diff():\n return np.abs(np.mean(control()) - np.mean(treatment()))\n\n @reactive.Calc\n def permute():\n combined = np.concatenate([treatment(), control()])\n permutation_results = []\n for _ in range(input.n_permutations()):\n combined = np.random.permutation(combined)\n perm_treatment = combined[: len(treatment())]\n perm_control = combined[-len(control()) :]\n permutation_results.append(np.mean(perm_treatment) - np.mean(perm_control))\n return permutation_results\n\n @reactive.Calc\n def count_extreme():\n return np.sum(np.array(np.abs(permute())) >= sample_mean_diff())\n\n @reactive.Calc\n def p_value():\n return count_extreme() / input.n_permutations()\n\n @render_widget\n def treatment_control_hist():\n res = pd.DataFrame(\n {\"Treatment\": treatment(), \"Control\": control()},\n index=range(len(treatment())),\n ).melt()\n fig = px.histogram(\n res,\n x=\"value\",\n color=\"variable\",\n marginal=\"rug\",\n nbins=60,\n color_discrete_sequence=[COL_treatment, COL_control],\n # opacity=0.75,\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Treatment, Control Values\",\n legend_title=\"\",\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n )\n return fig\n\n @render_widget\n def permutation_hist():\n res = pd.DataFrame(permute(), columns=[\"Permutation\"])\n res[\"Highlight\"] = np.abs(res[\"Permutation\"]) >= sample_mean_diff()\n fig = px.histogram(\n res,\n x=\"Permutation\",\n color=\"Highlight\",\n marginal=\"rug\",\n color_discrete_sequence=[COL_permutation, COL_perm_highlight],\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Difference in Means\",\n legend_title=\"\",\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n showlegend=False,\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=sample_mean_diff(),\n y0=0,\n x1=sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=-sample_mean_diff(),\n y0=0,\n x1=-sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_annotation(\n x=sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n fig.add_annotation(\n x=-sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"-{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n\n return fig\n\n @render.ui\n def txt_pop_dif():\n return ui.HTML(\n f\"Diff Population Means: <span style='color:{COL_TXT};'>{input.control_mean()-input.treatment_mean():.3f}</span>\"\n )\n\n @render.ui\n def txt_sample_dif():\n return ui.HTML(\n f\"Diff Sample Means: <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>\"\n )\n\n # @render.ui\n # def txt_perm():\n # return ui.HTML(\n # f\"How likely is it to get a result as extreme as <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>? \\\n # <br>What % of experiments have an outcome > <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>?\"\n # )\n\n # @render.ui\n # def txt_p_value():\n # return ui.HTML(\n # f\"<b>p-value: <span style='color:{COL_TXT};'>{p_value():.3f}</span><b>\"\n # )\n\n @render.data_frame\n def pval_df():\n df = pd.DataFrame(\n {\n \"What question are we trying to answer?\": [\n f\"What proportion of permutations have an outcome > {sample_mean_diff():.3f} or < -{sample_mean_diff():.3f}?\",\n f\"How likely is it to get a result as extreme as {sample_mean_diff():.3f}?\",\n ],\n \"Answers\": [\n f\"{count_extreme()} out of {input.n_permutations()}\",\n f\"{p_value()*100:.2f}%, or a p-value of {p_value():.3f}\",\n ],\n }\n )\n return df\n\n\napp = App(app_ui, server)\n", "type": "text"}]
[{"name": "app.py", "content": "from __future__ import annotations\nfrom scipy.stats import norm\nfrom shiny import App, reactive, render, ui\nfrom shinyswatch.theme import cosmo as shiny_theme\nfrom shinywidgets import output_widget, render_widget\nimport numpy as np\nimport pandas as pd\nimport plotly.express as px\nimport shinyswatch\n\nCOL_TXT = \"#0081a7\"\nCOL_treatment = \"#0081a7\"\nCOL_control = \"#00afb9\"\nCOL_permutation = \"#c0c0c0\"\nCOL_perm_highlight = \"#f07167\"\n\napp_ui = ui.page_sidebar(\n ui.sidebar(\n ui.markdown(\n \"Based on Samuele Mazzanti's [Medium post](https://towardsdatascience.com/why-statistical-significance-is-pointless-a7644be30266), this app makes interactive the two ideas of statistical significance which Samuele explores.\"\n ),\n ui.input_slider(\n id=\"treatment_mean\",\n label=\"Treatment Mean\",\n min=1,\n max=20,\n value=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_mean\",\n label=\"Control Mean\",\n min=0,\n max=20,\n value=10.5,\n step=0.1,\n ),\n ui.input_slider(\n id=\"treatment_cov\",\n label=\"Treatment Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_cov\",\n label=\"Control Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"n_points\",\n label=\"Points per Group\",\n min=10,\n max=300,\n value=100,\n step=10,\n ),\n ui.input_slider(\n id=\"n_permutations\",\n label=\"Number of Permutations\",\n min=100,\n max=10000,\n value=1000,\n step=1000,\n ),\n open=\"always\",\n bg=\"#f8f8f8\",\n ),\n ui.navset_tab(\n ui.nav_panel(\n \"P-Values\",\n ui.column(\n 10,\n ui.row(\n ui.column(\n 6,\n output_widget(\n \"treatment_control_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.h5(\"Simulated Data\"),\n ui.output_ui(\"txt_pop_dif\"),\n ui.br(),\n ui.output_ui(\"txt_sample_dif\"),\n ),\n ui.column(\n 6,\n output_widget(\n \"permutation_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.output_data_frame(\"pval_df\"),\n ),\n ),\n ),\n ),\n ui.nav_panel(\n \"Confidence Intervals\",\n ui.em(\"Coming soon!\"),\n )\n ),\n ui.br(),\n ui.HTML(\n \"<div style='text-align: center; color: gray; font-size:0.9em;'> Shiny for Python, using ShinyLive | <a href = 'https://rsangole.github.io/shiny-python-statsignif/' target='_blank'>Github Repo</a> | <a href = 'http://www.rsangole.com' target='_blank'>Rahul Sangole</a> | Dec '24</div>\"\n ),\n fillable=False,\n title=\"Why \u201cStatistical Significance\u201d Is Pointless\",\n theme=shiny_theme,\n)\n\n\ndef server(input, output, session):\n @reactive.Calc\n def treatment():\n return norm.rvs(\n input.treatment_mean(),\n input.treatment_cov(),\n input.n_points(),\n random_state=42,\n )\n\n @reactive.Calc\n def control():\n return norm.rvs(\n input.control_mean(), \n input.control_cov(), \n input.n_points(), \n random_state=42\n )\n\n @reactive.Calc\n def sample_mean_diff():\n return np.abs(np.mean(control()) - np.mean(treatment()))\n\n @reactive.Calc\n def permute():\n combined = np.concatenate([treatment(), control()])\n permutation_results = []\n for _ in range(input.n_permutations()):\n combined = np.random.permutation(combined)\n perm_treatment = combined[: len(treatment())]\n perm_control = combined[-len(control()) :]\n permutation_results.append(np.mean(perm_treatment) - np.mean(perm_control))\n return permutation_results\n\n @reactive.Calc\n def count_extreme():\n return np.sum(np.array(np.abs(permute())) >= sample_mean_diff())\n\n @reactive.Calc\n def p_value():\n return count_extreme() / input.n_permutations()\n\n @render_widget\n def treatment_control_hist():\n res = pd.DataFrame(\n {\"Treatment\": treatment(), \"Control\": control()},\n index=range(len(treatment())),\n ).melt()\n fig = px.histogram(\n res,\n x=\"value\",\n color=\"variable\",\n marginal=\"rug\",\n nbins=60,\n color_discrete_sequence=[COL_treatment, COL_control],\n # opacity=0.75,\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Treatment, Control Values\",\n legend_title=\"\",\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n )\n return fig\n\n @render_widget\n def permutation_hist():\n res = pd.DataFrame(permute(), columns=[\"Permutation\"])\n res[\"Highlight\"] = np.abs(res[\"Permutation\"]) >= sample_mean_diff()\n fig = px.histogram(\n res,\n x=\"Permutation\",\n color=\"Highlight\",\n marginal=\"rug\",\n color_discrete_sequence=[COL_permutation, COL_perm_highlight],\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Difference in Means\",\n legend_title=\"\",\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n showlegend=False,\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=sample_mean_diff(),\n y0=0,\n x1=sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=-sample_mean_diff(),\n y0=0,\n x1=-sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_annotation(\n x=sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n fig.add_annotation(\n x=-sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"-{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n\n return fig\n\n @render.ui\n def txt_pop_dif():\n return ui.HTML(\n f\"Diff Population Means: <span style='color:{COL_TXT};'>{input.control_mean()-input.treatment_mean():.3f}</span>\"\n )\n\n @render.ui\n def txt_sample_dif():\n return ui.HTML(\n f\"Diff Sample Means: <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>\"\n )\n\n # @render.ui\n # def txt_perm():\n # return ui.HTML(\n # f\"How likely is it to get a result as extreme as <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>? \\\n # <br>What % of experiments have an outcome > <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>?\"\n # )\n\n # @render.ui\n # def txt_p_value():\n # return ui.HTML(\n # f\"<b>p-value: <span style='color:{COL_TXT};'>{p_value():.3f}</span><b>\"\n # )\n\n @render.data_frame\n def pval_df():\n df = pd.DataFrame(\n {\n \"What question are we trying to answer?\": [\n f\"What proportion of permutations have an outcome > {sample_mean_diff():.3f} or < -{sample_mean_diff():.3f}?\",\n f\"How likely is it to get a result as extreme as {sample_mean_diff():.3f}?\",\n ],\n \"Answers\": [\n f\"{count_extreme()} out of {input.n_permutations()}\",\n f\"{p_value()*100:.2f}%, or a p-value of {p_value():.3f}\",\n ],\n }\n )\n return df\n\n\napp = App(app_ui, server)\n", "type": "text"}]

0 comments on commit 8d68f82

Please sign in to comment.