Improvements

rsangole · rsangole · commit 8d68f8243b63 · 2024-12-07T21:29:37.000-08:00
diff --git a/docs/app.json b/docs/app.json
@@ -1 +1 @@
-[{"name": "app.py", "content": "from __future__ import annotations\nfrom scipy.stats import norm\nfrom shiny import App, reactive, render, ui\nfrom shinyswatch.theme import cosmo as shiny_theme\nfrom shinywidgets import output_widget, render_widget\nimport numpy as np\nimport pandas as pd\nimport plotly.express as px\nimport shinyswatch\n\nCOL_TXT = \"#0081a7\"\nCOL_treatment = \"#0081a7\"\nCOL_control = \"#00afb9\"\nCOL_permutation = \"#c0c0c0\"\nCOL_perm_highlight = \"#f07167\"\n\napp_ui = ui.page_sidebar(\n    ui.sidebar(\n        ui.markdown(\n            \"Based on Samuele Mazzanti's [Medium post](https://towardsdatascience.com/why-statistical-significance-is-pointless-a7644be30266), this app makes interactive the two ideas of statistical significance which Samuele explores.\"\n        ),\n        ui.input_slider(\n            id=\"treatment_mean\",\n            label=\"Treatment Mean\",\n            min=1,\n            max=20,\n            value=10,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"control_mean\",\n            label=\"Control Mean\",\n            min=0,\n            max=20,\n            value=10.5,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"treatment_cov\",\n            label=\"Treatment Std Dev\",\n            value=2,\n            min=0,\n            max=10,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"control_cov\",\n            label=\"Control Std Dev\",\n            value=2,\n            min=0,\n            max=10,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"n_points\",\n            label=\"Points per Group\",\n            min=10,\n            max=300,\n            value=100,\n            step=10,\n        ),\n        ui.input_slider(\n            id=\"n_permutations\",\n            label=\"Number of Permutations\",\n            min=100,\n            max=10000,\n            value=1000,\n            step=1000,\n        ),\n        open=\"always\",\n        bg=\"#f8f8f8\",\n    ),\n    ui.navset_tab(\n        ui.nav_panel(\n            \"P-Values\",\n            ui.column(\n                10,\n                ui.row(\n                    ui.column(\n                        6,\n                        output_widget(\n                            \"treatment_control_hist\", height=\"400px\", width=\"400px\"\n                        ),\n                        ui.h5(\"Simulated Data\"),\n                        ui.output_ui(\"txt_pop_dif\"),\n                        ui.br(),\n                        ui.output_ui(\"txt_sample_dif\"),\n                    ),\n                    ui.column(\n                        6,\n                        output_widget(\n                            \"permutation_hist\", height=\"400px\", width=\"400px\"\n                        ),\n                        ui.output_data_frame(\"pval_df\"),\n                    ),\n                ),\n            ),\n        ),\n        ui.nav_panel(\n            \"Confidence Intervals\",\n            ui.em(\"Coming soon!\"),\n        )\n    ),\n    ui.br(),\n    ui.HTML(\n        \"<div style='text-align: center; color: gray; font-size:0.9em;'> Created using Shiny for Python | <a href = 'http://www.rsangole.com'>Rahul Sangole</a> | Dec '24</div>\"\n    ),\n    fillable=False,\n    title=\"Why \u201cStatistical Significance\u201d Is Pointless\",\n    theme=shiny_theme,\n)\n\n\ndef server(input, output, session):\n    @reactive.Calc\n    def treatment():\n        return norm.rvs(\n            input.treatment_mean(),\n            input.treatment_cov(),\n            input.n_points(),\n            random_state=42,\n        )\n\n    @reactive.Calc\n    def control():\n        return norm.rvs(\n            input.control_mean(), \n            input.control_cov(), \n            input.n_points(), \n            random_state=42\n        )\n\n    @reactive.Calc\n    def sample_mean_diff():\n        return np.abs(np.mean(control()) - np.mean(treatment()))\n\n    @reactive.Calc\n    def permute():\n        combined = np.concatenate([treatment(), control()])\n        permutation_results = []\n        for _ in range(input.n_permutations()):\n            combined = np.random.permutation(combined)\n            perm_treatment = combined[: len(treatment())]\n            perm_control = combined[-len(control()) :]\n            permutation_results.append(np.mean(perm_treatment) - np.mean(perm_control))\n        return permutation_results\n\n    @reactive.Calc\n    def count_extreme():\n        return np.sum(np.array(np.abs(permute())) >= sample_mean_diff())\n\n    @reactive.Calc\n    def p_value():\n        return count_extreme() / input.n_permutations()\n\n    @render_widget\n    def treatment_control_hist():\n        res = pd.DataFrame(\n            {\"Treatment\": treatment(), \"Control\": control()},\n            index=range(len(treatment())),\n        ).melt()\n        fig = px.histogram(\n            res,\n            x=\"value\",\n            color=\"variable\",\n            marginal=\"rug\",\n            nbins=60,\n            color_discrete_sequence=[COL_treatment, COL_control],\n            # opacity=0.75,\n        ).update_layout(\n            title={\"text\": \"\", \"x\": 0.5},\n            yaxis_title=\"Count\",\n            xaxis_title=\"Treatment, Control Values\",\n            legend_title=\"\",\n            legend=dict(\n                orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n            ),\n            plot_bgcolor=\"white\",\n            paper_bgcolor=\"white\",\n        )\n        return fig\n\n    @render_widget\n    def permutation_hist():\n        res = pd.DataFrame(permute(), columns=[\"Permutation\"])\n        res[\"Highlight\"] = np.abs(res[\"Permutation\"]) >= sample_mean_diff()\n        fig = px.histogram(\n            res,\n            x=\"Permutation\",\n            color=\"Highlight\",\n            marginal=\"rug\",\n            color_discrete_sequence=[COL_permutation, COL_perm_highlight],\n        ).update_layout(\n            title={\"text\": \"\", \"x\": 0.5},\n            yaxis_title=\"Count\",\n            xaxis_title=\"Difference in Means\",\n            legend_title=\"\",\n            plot_bgcolor=\"white\",\n            paper_bgcolor=\"white\",\n            showlegend=False,\n            legend=dict(\n                orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n            ),\n        )\n        fig.add_shape(\n            type=\"line\",\n            x0=sample_mean_diff(),\n            y0=0,\n            x1=sample_mean_diff(),\n            y1=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            line=dict(\n                width=1,\n                dash=\"dot\",\n            ),\n        )\n        fig.add_shape(\n            type=\"line\",\n            x0=-sample_mean_diff(),\n            y0=0,\n            x1=-sample_mean_diff(),\n            y1=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            line=dict(\n                width=1,\n                dash=\"dot\",\n            ),\n        )\n        fig.add_annotation(\n            x=sample_mean_diff(),\n            y=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            text=f\"{sample_mean_diff():.3f}\",\n            showarrow=False,\n            yshift=1,\n            xanchor=\"left\",\n        )\n        fig.add_annotation(\n            x=-sample_mean_diff(),\n            y=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            text=f\"-{sample_mean_diff():.3f}\",\n            showarrow=False,\n            yshift=1,\n            xanchor=\"left\",\n        )\n\n        return fig\n\n    @render.ui\n    def txt_pop_dif():\n        return ui.HTML(\n            f\"Diff Population Means: <span style='color:{COL_TXT};'>{input.control_mean()-input.treatment_mean():.3f}</span>\"\n            )\n\n    @render.ui\n    def txt_sample_dif():\n        return ui.HTML(\n            f\"Diff Sample Means: <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>\"\n        )\n\n    # @render.ui\n    # def txt_perm():\n    #     return ui.HTML(\n    #         f\"How likely is it to get a result as extreme as <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>? \\\n    #             <br>What % of experiments have an outcome > <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>?\"\n    #     )\n\n    # @render.ui\n    # def txt_p_value():\n    #     return ui.HTML(\n    #         f\"<b>p-value: <span style='color:{COL_TXT};'>{p_value():.3f}</span><b>\"\n    #     )\n\n    @render.data_frame\n    def pval_df():\n        df = pd.DataFrame(\n            {\n                \"What question are we trying to answer?\": [\n                    f\"What proportion of permutations have an outcome > {sample_mean_diff():.3f} or  < -{sample_mean_diff():.3f}?\",\n                    f\"How likely is it to get a result as extreme as {sample_mean_diff():.3f}?\",\n                ],\n                \"Answers\": [\n                    f\"{count_extreme()} out of {input.n_permutations()}\",\n                    f\"{p_value()*100:.2f}%, or a p-value of {p_value():.3f}\",\n                ],\n            }\n        )\n        return df\n\n\napp = App(app_ui, server)\n", "type": "text"}]
+[{"name": "app.py", "content": "from __future__ import annotations\nfrom scipy.stats import norm\nfrom shiny import App, reactive, render, ui\nfrom shinyswatch.theme import cosmo as shiny_theme\nfrom shinywidgets import output_widget, render_widget\nimport numpy as np\nimport pandas as pd\nimport plotly.express as px\nimport shinyswatch\n\nCOL_TXT = \"#0081a7\"\nCOL_treatment = \"#0081a7\"\nCOL_control = \"#00afb9\"\nCOL_permutation = \"#c0c0c0\"\nCOL_perm_highlight = \"#f07167\"\n\napp_ui = ui.page_sidebar(\n    ui.sidebar(\n        ui.markdown(\n            \"Based on Samuele Mazzanti's [Medium post](https://towardsdatascience.com/why-statistical-significance-is-pointless-a7644be30266), this app makes interactive the two ideas of statistical significance which Samuele explores.\"\n        ),\n        ui.input_slider(\n            id=\"treatment_mean\",\n            label=\"Treatment Mean\",\n            min=1,\n            max=20,\n            value=10,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"control_mean\",\n            label=\"Control Mean\",\n            min=0,\n            max=20,\n            value=10.5,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"treatment_cov\",\n            label=\"Treatment Std Dev\",\n            value=2,\n            min=0,\n            max=10,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"control_cov\",\n            label=\"Control Std Dev\",\n            value=2,\n            min=0,\n            max=10,\n            step=0.1,\n        ),\n        ui.input_slider(\n            id=\"n_points\",\n            label=\"Points per Group\",\n            min=10,\n            max=300,\n            value=100,\n            step=10,\n        ),\n        ui.input_slider(\n            id=\"n_permutations\",\n            label=\"Number of Permutations\",\n            min=100,\n            max=10000,\n            value=1000,\n            step=1000,\n        ),\n        open=\"always\",\n        bg=\"#f8f8f8\",\n    ),\n    ui.navset_tab(\n        ui.nav_panel(\n            \"P-Values\",\n            ui.column(\n                10,\n                ui.row(\n                    ui.column(\n                        6,\n                        output_widget(\n                            \"treatment_control_hist\", height=\"400px\", width=\"400px\"\n                        ),\n                        ui.h5(\"Simulated Data\"),\n                        ui.output_ui(\"txt_pop_dif\"),\n                        ui.br(),\n                        ui.output_ui(\"txt_sample_dif\"),\n                    ),\n                    ui.column(\n                        6,\n                        output_widget(\n                            \"permutation_hist\", height=\"400px\", width=\"400px\"\n                        ),\n                        ui.output_data_frame(\"pval_df\"),\n                    ),\n                ),\n            ),\n        ),\n        ui.nav_panel(\n            \"Confidence Intervals\",\n            ui.em(\"Coming soon!\"),\n        )\n    ),\n    ui.br(),\n    ui.HTML(\n        \"<div style='text-align: center; color: gray; font-size:0.9em;'> Shiny for Python, using ShinyLive | <a href = 'https://rsangole.github.io/shiny-python-statsignif/' target='_blank'>Github Repo</a> | <a href = 'http://www.rsangole.com' target='_blank'>Rahul Sangole</a> | Dec '24</div>\"\n    ),\n    fillable=False,\n    title=\"Why \u201cStatistical Significance\u201d Is Pointless\",\n    theme=shiny_theme,\n)\n\n\ndef server(input, output, session):\n    @reactive.Calc\n    def treatment():\n        return norm.rvs(\n            input.treatment_mean(),\n            input.treatment_cov(),\n            input.n_points(),\n            random_state=42,\n        )\n\n    @reactive.Calc\n    def control():\n        return norm.rvs(\n            input.control_mean(), \n            input.control_cov(), \n            input.n_points(), \n            random_state=42\n        )\n\n    @reactive.Calc\n    def sample_mean_diff():\n        return np.abs(np.mean(control()) - np.mean(treatment()))\n\n    @reactive.Calc\n    def permute():\n        combined = np.concatenate([treatment(), control()])\n        permutation_results = []\n        for _ in range(input.n_permutations()):\n            combined = np.random.permutation(combined)\n            perm_treatment = combined[: len(treatment())]\n            perm_control = combined[-len(control()) :]\n            permutation_results.append(np.mean(perm_treatment) - np.mean(perm_control))\n        return permutation_results\n\n    @reactive.Calc\n    def count_extreme():\n        return np.sum(np.array(np.abs(permute())) >= sample_mean_diff())\n\n    @reactive.Calc\n    def p_value():\n        return count_extreme() / input.n_permutations()\n\n    @render_widget\n    def treatment_control_hist():\n        res = pd.DataFrame(\n            {\"Treatment\": treatment(), \"Control\": control()},\n            index=range(len(treatment())),\n        ).melt()\n        fig = px.histogram(\n            res,\n            x=\"value\",\n            color=\"variable\",\n            marginal=\"rug\",\n            nbins=60,\n            color_discrete_sequence=[COL_treatment, COL_control],\n            # opacity=0.75,\n        ).update_layout(\n            title={\"text\": \"\", \"x\": 0.5},\n            yaxis_title=\"Count\",\n            xaxis_title=\"Treatment, Control Values\",\n            legend_title=\"\",\n            legend=dict(\n                orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n            ),\n            plot_bgcolor=\"white\",\n            paper_bgcolor=\"white\",\n        )\n        return fig\n\n    @render_widget\n    def permutation_hist():\n        res = pd.DataFrame(permute(), columns=[\"Permutation\"])\n        res[\"Highlight\"] = np.abs(res[\"Permutation\"]) >= sample_mean_diff()\n        fig = px.histogram(\n            res,\n            x=\"Permutation\",\n            color=\"Highlight\",\n            marginal=\"rug\",\n            color_discrete_sequence=[COL_permutation, COL_perm_highlight],\n        ).update_layout(\n            title={\"text\": \"\", \"x\": 0.5},\n            yaxis_title=\"Count\",\n            xaxis_title=\"Difference in Means\",\n            legend_title=\"\",\n            plot_bgcolor=\"white\",\n            paper_bgcolor=\"white\",\n            showlegend=False,\n            legend=dict(\n                orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n            ),\n        )\n        fig.add_shape(\n            type=\"line\",\n            x0=sample_mean_diff(),\n            y0=0,\n            x1=sample_mean_diff(),\n            y1=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            line=dict(\n                width=1,\n                dash=\"dot\",\n            ),\n        )\n        fig.add_shape(\n            type=\"line\",\n            x0=-sample_mean_diff(),\n            y0=0,\n            x1=-sample_mean_diff(),\n            y1=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            line=dict(\n                width=1,\n                dash=\"dot\",\n            ),\n        )\n        fig.add_annotation(\n            x=sample_mean_diff(),\n            y=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            text=f\"{sample_mean_diff():.3f}\",\n            showarrow=False,\n            yshift=1,\n            xanchor=\"left\",\n        )\n        fig.add_annotation(\n            x=-sample_mean_diff(),\n            y=0.5,\n            xref=\"x\",\n            yref=\"paper\",\n            text=f\"-{sample_mean_diff():.3f}\",\n            showarrow=False,\n            yshift=1,\n            xanchor=\"left\",\n        )\n\n        return fig\n\n    @render.ui\n    def txt_pop_dif():\n        return ui.HTML(\n            f\"Diff Population Means: <span style='color:{COL_TXT};'>{input.control_mean()-input.treatment_mean():.3f}</span>\"\n            )\n\n    @render.ui\n    def txt_sample_dif():\n        return ui.HTML(\n            f\"Diff Sample Means: <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>\"\n        )\n\n    # @render.ui\n    # def txt_perm():\n    #     return ui.HTML(\n    #         f\"How likely is it to get a result as extreme as <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>? \\\n    #             <br>What % of experiments have an outcome > <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>?\"\n    #     )\n\n    # @render.ui\n    # def txt_p_value():\n    #     return ui.HTML(\n    #         f\"<b>p-value: <span style='color:{COL_TXT};'>{p_value():.3f}</span><b>\"\n    #     )\n\n    @render.data_frame\n    def pval_df():\n        df = pd.DataFrame(\n            {\n                \"What question are we trying to answer?\": [\n                    f\"What proportion of permutations have an outcome > {sample_mean_diff():.3f} or  < -{sample_mean_diff():.3f}?\",\n                    f\"How likely is it to get a result as extreme as {sample_mean_diff():.3f}?\",\n                ],\n                \"Answers\": [\n                    f\"{count_extreme()} out of {input.n_permutations()}\",\n                    f\"{p_value()*100:.2f}%, or a p-value of {p_value():.3f}\",\n                ],\n            }\n        )\n        return df\n\n\napp = App(app_ui, server)\n", "type": "text"}]

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		-[{"name": "app.py", "content": "from __future__ import annotations\nfrom scipy.stats import norm\nfrom shiny import App, reactive, render, ui\nfrom shinyswatch.theme import cosmo as shiny_theme\nfrom shinywidgets import output_widget, render_widget\nimport numpy as np\nimport pandas as pd\nimport plotly.express as px\nimport shinyswatch\n\nCOL_TXT = \"#0081a7\"\nCOL_treatment = \"#0081a7\"\nCOL_control = \"#00afb9\"\nCOL_permutation = \"#c0c0c0\"\nCOL_perm_highlight = \"#f07167\"\n\napp_ui = ui.page_sidebar(\n ui.sidebar(\n ui.markdown(\n \"Based on Samuele Mazzanti's [Medium post](https://towardsdatascience.com/why-statistical-significance-is-pointless-a7644be30266), this app makes interactive the two ideas of statistical significance which Samuele explores.\"\n ),\n ui.input_slider(\n id=\"treatment_mean\",\n label=\"Treatment Mean\",\n min=1,\n max=20,\n value=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_mean\",\n label=\"Control Mean\",\n min=0,\n max=20,\n value=10.5,\n step=0.1,\n ),\n ui.input_slider(\n id=\"treatment_cov\",\n label=\"Treatment Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_cov\",\n label=\"Control Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"n_points\",\n label=\"Points per Group\",\n min=10,\n max=300,\n value=100,\n step=10,\n ),\n ui.input_slider(\n id=\"n_permutations\",\n label=\"Number of Permutations\",\n min=100,\n max=10000,\n value=1000,\n step=1000,\n ),\n open=\"always\",\n bg=\"#f8f8f8\",\n ),\n ui.navset_tab(\n ui.nav_panel(\n \"P-Values\",\n ui.column(\n 10,\n ui.row(\n ui.column(\n 6,\n output_widget(\n \"treatment_control_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.h5(\"Simulated Data\"),\n ui.output_ui(\"txt_pop_dif\"),\n ui.br(),\n ui.output_ui(\"txt_sample_dif\"),\n ),\n ui.column(\n 6,\n output_widget(\n \"permutation_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.output_data_frame(\"pval_df\"),\n ),\n ),\n ),\n ),\n ui.nav_panel(\n \"Confidence Intervals\",\n ui.em(\"Coming soon!\"),\n )\n ),\n ui.br(),\n ui.HTML(\n \"<div style='text-align: center; color: gray; font-size:0.9em;'> Created using Shiny for Python \| <a href = 'http://www.rsangole.com'>Rahul Sangole</a> \| Dec '24</div>\"\n ),\n fillable=False,\n title=\"Why \u201cStatistical Significance\u201d Is Pointless\",\n theme=shiny_theme,\n)\n\n\ndef server(input, output, session):\n @reactive.Calc\n def treatment():\n return norm.rvs(\n input.treatment_mean(),\n input.treatment_cov(),\n input.n_points(),\n random_state=42,\n )\n\n @reactive.Calc\n def control():\n return norm.rvs(\n input.control_mean(), \n input.control_cov(), \n input.n_points(), \n random_state=42\n )\n\n @reactive.Calc\n def sample_mean_diff():\n return np.abs(np.mean(control()) - np.mean(treatment()))\n\n @reactive.Calc\n def permute():\n combined = np.concatenate([treatment(), control()])\n permutation_results = []\n for _ in range(input.n_permutations()):\n combined = np.random.permutation(combined)\n perm_treatment = combined[: len(treatment())]\n perm_control = combined[-len(control()) :]\n permutation_results.append(np.mean(perm_treatment) - np.mean(perm_control))\n return permutation_results\n\n @reactive.Calc\n def count_extreme():\n return np.sum(np.array(np.abs(permute())) >= sample_mean_diff())\n\n @reactive.Calc\n def p_value():\n return count_extreme() / input.n_permutations()\n\n @render_widget\n def treatment_control_hist():\n res = pd.DataFrame(\n {\"Treatment\": treatment(), \"Control\": control()},\n index=range(len(treatment())),\n ).melt()\n fig = px.histogram(\n res,\n x=\"value\",\n color=\"variable\",\n marginal=\"rug\",\n nbins=60,\n color_discrete_sequence=[COL_treatment, COL_control],\n # opacity=0.75,\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Treatment, Control Values\",\n legend_title=\"\",\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n )\n return fig\n\n @render_widget\n def permutation_hist():\n res = pd.DataFrame(permute(), columns=[\"Permutation\"])\n res[\"Highlight\"] = np.abs(res[\"Permutation\"]) >= sample_mean_diff()\n fig = px.histogram(\n res,\n x=\"Permutation\",\n color=\"Highlight\",\n marginal=\"rug\",\n color_discrete_sequence=[COL_permutation, COL_perm_highlight],\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Difference in Means\",\n legend_title=\"\",\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n showlegend=False,\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=sample_mean_diff(),\n y0=0,\n x1=sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=-sample_mean_diff(),\n y0=0,\n x1=-sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_annotation(\n x=sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n fig.add_annotation(\n x=-sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"-{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n\n return fig\n\n @render.ui\n def txt_pop_dif():\n return ui.HTML(\n f\"Diff Population Means: <span style='color:{COL_TXT};'>{input.control_mean()-input.treatment_mean():.3f}</span>\"\n )\n\n @render.ui\n def txt_sample_dif():\n return ui.HTML(\n f\"Diff Sample Means: <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>\"\n )\n\n # @render.ui\n # def txt_perm():\n # return ui.HTML(\n # f\"How likely is it to get a result as extreme as <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>? \\\n # <br>What % of experiments have an outcome > <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>?\"\n # )\n\n # @render.ui\n # def txt_p_value():\n # return ui.HTML(\n # f\"<b>p-value: <span style='color:{COL_TXT};'>{p_value():.3f}</span><b>\"\n # )\n\n @render.data_frame\n def pval_df():\n df = pd.DataFrame(\n {\n \"What question are we trying to answer?\": [\n f\"What proportion of permutations have an outcome > {sample_mean_diff():.3f} or < -{sample_mean_diff():.3f}?\",\n f\"How likely is it to get a result as extreme as {sample_mean_diff():.3f}?\",\n ],\n \"Answers\": [\n f\"{count_extreme()} out of {input.n_permutations()}\",\n f\"{p_value()*100:.2f}%, or a p-value of {p_value():.3f}\",\n ],\n }\n )\n return df\n\n\napp = App(app_ui, server)\n", "type": "text"}]
	`1`	+[{"name": "app.py", "content": "from __future__ import annotations\nfrom scipy.stats import norm\nfrom shiny import App, reactive, render, ui\nfrom shinyswatch.theme import cosmo as shiny_theme\nfrom shinywidgets import output_widget, render_widget\nimport numpy as np\nimport pandas as pd\nimport plotly.express as px\nimport shinyswatch\n\nCOL_TXT = \"#0081a7\"\nCOL_treatment = \"#0081a7\"\nCOL_control = \"#00afb9\"\nCOL_permutation = \"#c0c0c0\"\nCOL_perm_highlight = \"#f07167\"\n\napp_ui = ui.page_sidebar(\n ui.sidebar(\n ui.markdown(\n \"Based on Samuele Mazzanti's [Medium post](https://towardsdatascience.com/why-statistical-significance-is-pointless-a7644be30266), this app makes interactive the two ideas of statistical significance which Samuele explores.\"\n ),\n ui.input_slider(\n id=\"treatment_mean\",\n label=\"Treatment Mean\",\n min=1,\n max=20,\n value=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_mean\",\n label=\"Control Mean\",\n min=0,\n max=20,\n value=10.5,\n step=0.1,\n ),\n ui.input_slider(\n id=\"treatment_cov\",\n label=\"Treatment Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"control_cov\",\n label=\"Control Std Dev\",\n value=2,\n min=0,\n max=10,\n step=0.1,\n ),\n ui.input_slider(\n id=\"n_points\",\n label=\"Points per Group\",\n min=10,\n max=300,\n value=100,\n step=10,\n ),\n ui.input_slider(\n id=\"n_permutations\",\n label=\"Number of Permutations\",\n min=100,\n max=10000,\n value=1000,\n step=1000,\n ),\n open=\"always\",\n bg=\"#f8f8f8\",\n ),\n ui.navset_tab(\n ui.nav_panel(\n \"P-Values\",\n ui.column(\n 10,\n ui.row(\n ui.column(\n 6,\n output_widget(\n \"treatment_control_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.h5(\"Simulated Data\"),\n ui.output_ui(\"txt_pop_dif\"),\n ui.br(),\n ui.output_ui(\"txt_sample_dif\"),\n ),\n ui.column(\n 6,\n output_widget(\n \"permutation_hist\", height=\"400px\", width=\"400px\"\n ),\n ui.output_data_frame(\"pval_df\"),\n ),\n ),\n ),\n ),\n ui.nav_panel(\n \"Confidence Intervals\",\n ui.em(\"Coming soon!\"),\n )\n ),\n ui.br(),\n ui.HTML(\n \"<div style='text-align: center; color: gray; font-size:0.9em;'> Shiny for Python, using ShinyLive \| <a href = 'https://rsangole.github.io/shiny-python-statsignif/' target='_blank'>Github Repo</a> \| <a href = 'http://www.rsangole.com' target='_blank'>Rahul Sangole</a> \| Dec '24</div>\"\n ),\n fillable=False,\n title=\"Why \u201cStatistical Significance\u201d Is Pointless\",\n theme=shiny_theme,\n)\n\n\ndef server(input, output, session):\n @reactive.Calc\n def treatment():\n return norm.rvs(\n input.treatment_mean(),\n input.treatment_cov(),\n input.n_points(),\n random_state=42,\n )\n\n @reactive.Calc\n def control():\n return norm.rvs(\n input.control_mean(), \n input.control_cov(), \n input.n_points(), \n random_state=42\n )\n\n @reactive.Calc\n def sample_mean_diff():\n return np.abs(np.mean(control()) - np.mean(treatment()))\n\n @reactive.Calc\n def permute():\n combined = np.concatenate([treatment(), control()])\n permutation_results = []\n for _ in range(input.n_permutations()):\n combined = np.random.permutation(combined)\n perm_treatment = combined[: len(treatment())]\n perm_control = combined[-len(control()) :]\n permutation_results.append(np.mean(perm_treatment) - np.mean(perm_control))\n return permutation_results\n\n @reactive.Calc\n def count_extreme():\n return np.sum(np.array(np.abs(permute())) >= sample_mean_diff())\n\n @reactive.Calc\n def p_value():\n return count_extreme() / input.n_permutations()\n\n @render_widget\n def treatment_control_hist():\n res = pd.DataFrame(\n {\"Treatment\": treatment(), \"Control\": control()},\n index=range(len(treatment())),\n ).melt()\n fig = px.histogram(\n res,\n x=\"value\",\n color=\"variable\",\n marginal=\"rug\",\n nbins=60,\n color_discrete_sequence=[COL_treatment, COL_control],\n # opacity=0.75,\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Treatment, Control Values\",\n legend_title=\"\",\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n )\n return fig\n\n @render_widget\n def permutation_hist():\n res = pd.DataFrame(permute(), columns=[\"Permutation\"])\n res[\"Highlight\"] = np.abs(res[\"Permutation\"]) >= sample_mean_diff()\n fig = px.histogram(\n res,\n x=\"Permutation\",\n color=\"Highlight\",\n marginal=\"rug\",\n color_discrete_sequence=[COL_permutation, COL_perm_highlight],\n ).update_layout(\n title={\"text\": \"\", \"x\": 0.5},\n yaxis_title=\"Count\",\n xaxis_title=\"Difference in Means\",\n legend_title=\"\",\n plot_bgcolor=\"white\",\n paper_bgcolor=\"white\",\n showlegend=False,\n legend=dict(\n orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"right\", x=1\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=sample_mean_diff(),\n y0=0,\n x1=sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_shape(\n type=\"line\",\n x0=-sample_mean_diff(),\n y0=0,\n x1=-sample_mean_diff(),\n y1=0.5,\n xref=\"x\",\n yref=\"paper\",\n line=dict(\n width=1,\n dash=\"dot\",\n ),\n )\n fig.add_annotation(\n x=sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n fig.add_annotation(\n x=-sample_mean_diff(),\n y=0.5,\n xref=\"x\",\n yref=\"paper\",\n text=f\"-{sample_mean_diff():.3f}\",\n showarrow=False,\n yshift=1,\n xanchor=\"left\",\n )\n\n return fig\n\n @render.ui\n def txt_pop_dif():\n return ui.HTML(\n f\"Diff Population Means: <span style='color:{COL_TXT};'>{input.control_mean()-input.treatment_mean():.3f}</span>\"\n )\n\n @render.ui\n def txt_sample_dif():\n return ui.HTML(\n f\"Diff Sample Means: <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>\"\n )\n\n # @render.ui\n # def txt_perm():\n # return ui.HTML(\n # f\"How likely is it to get a result as extreme as <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>? \\\n # <br>What % of experiments have an outcome > <span style='color:{COL_TXT};'>{sample_mean_diff():.3f}</span>?\"\n # )\n\n # @render.ui\n # def txt_p_value():\n # return ui.HTML(\n # f\"<b>p-value: <span style='color:{COL_TXT};'>{p_value():.3f}</span><b>\"\n # )\n\n @render.data_frame\n def pval_df():\n df = pd.DataFrame(\n {\n \"What question are we trying to answer?\": [\n f\"What proportion of permutations have an outcome > {sample_mean_diff():.3f} or < -{sample_mean_diff():.3f}?\",\n f\"How likely is it to get a result as extreme as {sample_mean_diff():.3f}?\",\n ],\n \"Answers\": [\n f\"{count_extreme()} out of {input.n_permutations()}\",\n f\"{p_value()*100:.2f}%, or a p-value of {p_value():.3f}\",\n ],\n }\n )\n return df\n\n\napp = App(app_ui, server)\n", "type": "text"}]