From ba35df49a0b81fae8957803891184dadf8c653bb Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Sat, 3 Dec 2022 07:37:04 +0100 Subject: [PATCH 1/5] improve the hist reference guide --- examples/reference/pandas/hist.ipynb | 87 +++++++++++++++++++++++++--- hvplot/plotting/core.py | 17 +++++- 2 files changed, 96 insertions(+), 8 deletions(-) diff --git a/examples/reference/pandas/hist.ipynb b/examples/reference/pandas/hist.ipynb index 1378fd54b..0146b21ad 100644 --- a/examples/reference/pandas/hist.ipynb +++ b/examples/reference/pandas/hist.ipynb @@ -6,14 +6,16 @@ "metadata": {}, "outputs": [], "source": [ - "import hvplot.pandas # noqa" + "import hvplot.pandas # noqa\n", + "\n", + "# hvplot.extension(\"matplotlib\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "`hist` is often a good way to start looking at data to get a sense of the distribution. Similar methods include [`kde`](kde.ipny) (also available as `density`)." + "`hist` is often a good way to start looking at continous data to get a sense of the distribution. Similar methods include [`kde`](kde.ipynb) (also available as `density`)." ] }, { @@ -22,9 +24,18 @@ "metadata": {}, "outputs": [], "source": [ - "from bokeh.sampledata.autompg import autompg_clean as df\n", + "from bokeh.sampledata.autompg import autompg_clean\n", "\n", - "df.sample(n=5)" + "autompg_clean.sample(n=5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "autompg_clean.hvplot.hist(\"weight\")" ] }, { @@ -40,16 +51,78 @@ "metadata": {}, "outputs": [], "source": [ - "df.hvplot.hist(\"weight\", by=\"origin\", subplots=True, width=250)" + "autompg_clean.hvplot.hist(\"weight\", by=\"origin\", subplots=True, width=250)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also plot histograms of *datetime* data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from bokeh.sampledata.commits import data as commits\n", + "\n", + "commits=commits.reset_index().sort_values(\"datetime\")\n", + "commits.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "commits.hvplot.hist(\n", + " \"datetime\",\n", + " bin_range=(pd.Timestamp('2012-11-30'), pd.Timestamp('2017-05-01')),\n", + " bins=54, \n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to plot the distribution of a categorical column you can calculate the distribution using `value_counts` and plot it using `.hvplot.bar`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "autompg_clean[\"mfr\"].value_counts().hvplot.bar(invert=True, flip_yaxis=True, height=500)" ] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", - "pygments_lexer": "ipython3" + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/hvplot/plotting/core.py b/hvplot/plotting/core.py index 894e84d83..6a34e66a6 100644 --- a/hvplot/plotting/core.py +++ b/hvplot/plotting/core.py @@ -1244,7 +1244,7 @@ def violin(self, y=None, by=None, **kwds): def hist(self, y=None, by=None, **kwds): """ - A `histogram` displays an approximate representation of the distribution of numerical data. + A `histogram` displays an approximate representation of the distribution of continous data. Reference: https://hvplot.holoviz.org/reference/pandas/hist.html @@ -1252,6 +1252,7 @@ def hist(self, y=None, by=None, **kwds): ---------- y : string or sequence Field(s) in the *wide* data to compute the distribution(s) from. + Please note the fields should contain continuous data. Not categorical. by : string or sequence Field(s) in the *long* data to group by. bins : int, optional @@ -1295,6 +1296,20 @@ def hist(self, y=None, by=None, **kwds): df['two'] = df['one'] + np.random.randint(1, 7, 6000) df.hvplot.hist(bins=12, alpha=0.5, color=["lightgreen", "pink"]) + If you want to show the distribution of the values of a categorical column, + you can use `value_counts` and `bar` as shown below + + .. code-block:: + + import hvplot.pandas + import pandas as pd + + data = pd.DataFrame({ + "library": ["bokeh", "plotly", "matplotlib", "bokeh", "matplotlib", "matplotlib"] + }) + + data["library"].value_counts().hvplot.bar(invert=True, flip_yaxis=True) + References ---------- From 00e6151a3ee13d18780050b71c52750eb6b24cee Mon Sep 17 00:00:00 2001 From: Marc Skov Madsen Date: Fri, 16 Dec 2022 23:43:55 +0100 Subject: [PATCH 2/5] Update examples/reference/pandas/hist.ipynb Co-authored-by: Maxime Liquet <35924738+maximlt@users.noreply.github.com> --- examples/reference/pandas/hist.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/reference/pandas/hist.ipynb b/examples/reference/pandas/hist.ipynb index 0146b21ad..6524e5ce6 100644 --- a/examples/reference/pandas/hist.ipynb +++ b/examples/reference/pandas/hist.ipynb @@ -70,7 +70,7 @@ "import pandas as pd\n", "from bokeh.sampledata.commits import data as commits\n", "\n", - "commits=commits.reset_index().sort_values(\"datetime\")\n", + "commits = commits.reset_index().sort_values(\"datetime\")\n", "commits.head(3)" ] }, From d0871bd0c8228b1edb81e2c3dfeee198ea57d638 Mon Sep 17 00:00:00 2001 From: maximlt Date: Thu, 16 Mar 2023 01:18:51 +0100 Subject: [PATCH 3/5] clean up --- examples/reference/pandas/hist.ipynb | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/examples/reference/pandas/hist.ipynb b/examples/reference/pandas/hist.ipynb index 6524e5ce6..54cd4b40c 100644 --- a/examples/reference/pandas/hist.ipynb +++ b/examples/reference/pandas/hist.ipynb @@ -105,22 +105,9 @@ } ], "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.7" + "pygments_lexer": "ipython3" } }, "nbformat": 4, From faf7fea134cba0b1f5efd411e5a714b75c9b225a Mon Sep 17 00:00:00 2001 From: Maxime Liquet <35924738+maximlt@users.noreply.github.com> Date: Thu, 16 Mar 2023 01:27:09 +0100 Subject: [PATCH 4/5] Remove extra parameters --- hvplot/plotting/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hvplot/plotting/core.py b/hvplot/plotting/core.py index c1cf17f07..53132e49e 100644 --- a/hvplot/plotting/core.py +++ b/hvplot/plotting/core.py @@ -1308,7 +1308,7 @@ def hist(self, y=None, by=None, **kwds): "library": ["bokeh", "plotly", "matplotlib", "bokeh", "matplotlib", "matplotlib"] }) - data["library"].value_counts().hvplot.bar(invert=True, flip_yaxis=True) + data["library"].value_counts().hvplot.bar() References ---------- From 759e20e163094243fb78b84b07d769372d57fac1 Mon Sep 17 00:00:00 2001 From: maximlt Date: Thu, 16 Mar 2023 01:31:41 +0100 Subject: [PATCH 5/5] indicate that value_counts is a Pandas method --- examples/reference/pandas/hist.ipynb | 3 ++- hvplot/plotting/core.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/reference/pandas/hist.ipynb b/examples/reference/pandas/hist.ipynb index 54cd4b40c..fff4f17f6 100644 --- a/examples/reference/pandas/hist.ipynb +++ b/examples/reference/pandas/hist.ipynb @@ -88,10 +88,11 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "If you want to plot the distribution of a categorical column you can calculate the distribution using `value_counts` and plot it using `.hvplot.bar`." + "If you want to plot the distribution of a categorical column you can calculate the distribution using Pandas' method `value_counts` and plot it using `.hvplot.bar`." ] }, { diff --git a/hvplot/plotting/core.py b/hvplot/plotting/core.py index 53132e49e..aa77fdeb5 100644 --- a/hvplot/plotting/core.py +++ b/hvplot/plotting/core.py @@ -1297,7 +1297,7 @@ def hist(self, y=None, by=None, **kwds): df.hvplot.hist(bins=12, alpha=0.5, color=["lightgreen", "pink"]) If you want to show the distribution of the values of a categorical column, - you can use `value_counts` and `bar` as shown below + you can use Pandas' method `value_counts` and `bar` as shown below .. code-block::