From fd53e00605a68f2be25ba9527330032f5d1460de Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Mon, 16 May 2022 17:45:49 -0500 Subject: [PATCH 01/18] marks homepage --- doc/user_guide/marks/index.rst | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 doc/user_guide/marks/index.rst diff --git a/doc/user_guide/marks/index.rst b/doc/user_guide/marks/index.rst new file mode 100644 index 000000000..670e76304 --- /dev/null +++ b/doc/user_guide/marks/index.rst @@ -0,0 +1,63 @@ +.. currentmodule:: altair + +.. _user-guide-marks: + +Marks +~~~~~ + +We saw in :ref:`user-guide-encoding` that the :meth:`~Chart.encode` method is +used to map columns to visual attributes of the plot. +The ``mark`` property is what specifies how exactly those attributes +should be represented on the plot. + +Altair provides a number of basic mark properties +(the mark properties column links to the Vega-Lite documentation +that allows you to interactively explore the effects of modifying each property): + +========================================= ========================================= ================================================================================ +Mark Method Description +========================================= ========================================= ================================================================================ +:ref:`user-guide-arc-marks` :meth:`~Chart.mark_arc` A pie chart. +:ref:`user-guide-area-marks` :meth:`~Chart.mark_area` A filled area plot. +bar :meth:`~Chart.mark_bar` A bar plot. +circle :meth:`~Chart.mark_circle` A scatter plot with filled circles. +geoshape :meth:`~Chart.mark_geoshape` Visualization containing spatial data +image :meth:`~Chart.mark_image` A scatter plot with image markers. +line :meth:`~Chart.mark_line` A line plot. +point :meth:`~Chart.mark_point` A scatter plot with configurable point shapes. +rect :meth:`~Chart.mark_rect` A filled rectangle, used for heatmaps +rule :meth:`~Chart.mark_rule` A vertical or horizontal line spanning the axis. +square :meth:`~Chart.mark_square` A scatter plot with filled squares. +text :meth:`~Chart.mark_text` A scatter plot with points represented by text. +tick :meth:`~Chart.mark_tick` A vertical or horizontal tick mark. +trail :meth:`~Chart.mark_trail` A line with variable widths. +========================================= ========================================= ================================================================================ + +In addition, Altair provides the following compound marks: + +========== ============================== ================================ ================================== +Mark Name Method Description Example +========== ============================== ================================ ================================== +box plot :meth:`~Chart.mark_boxplot` A box plot. :ref:`gallery_boxplot` +error band :meth:`~Chart.mark_errorband` A continuous band around a line. :ref:`gallery_line_with_ci` +error bar :meth:`~Chart.mark_errorbar` An errorbar around a point. :ref:`gallery_errorbars_with_ci` +========== ============================== ================================ ================================== + +In Altair, marks can be most conveniently specified by the ``mark_*`` methods +of the Chart object, which take optional keyword arguments that are passed to +:class:`MarkDef` to configure the look of the marks. + +Mark Properties +_______________ + +As seen in the last two examples, additional arguments to ``mark_*()`` methods are passed along to an +associated :class:`MarkDef` instance, which supports the following attributes: + +.. altair-object-table:: altair.MarkDef + +.. toctree:: + :hidden: + + arc + area + From 36c8b60f76cb82c56d0d79e537d5b43d2156ca42 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Mon, 16 May 2022 19:27:23 -0500 Subject: [PATCH 02/18] arc page --- doc/user_guide/marks/arc.rst | 71 ++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 doc/user_guide/marks/arc.rst diff --git a/doc/user_guide/marks/arc.rst b/doc/user_guide/marks/arc.rst new file mode 100644 index 000000000..51a88b7b5 --- /dev/null +++ b/doc/user_guide/marks/arc.rst @@ -0,0 +1,71 @@ +.. currentmodule:: altair + +.. _user-guide-arc-marks: + +Arc +~~~ + +Arc marks are circular arcs defined by a center point plus angular and radial extents. +Arc marks are typically used for radial plots such as pie and donut charts. + +Examples +-------- + +We can create a pie chart by encoding ``theta`` or ``color`` arc marks. + +.. altair-plot:: + import pandas as pd + import altair as alt + + source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) + + alt.Chart(source).mark_arc().encode( + theta=alt.Theta( + field="value", + type="quantitative"), + color=alt.Color( + field="category", + type="nominal"), + ) + +Setting ``innerRadius`` to non-zero values will create a donut chart. + +.. altair-plot:: + import pandas as pd + import altair as alt + + source = pd.DataFrame({"category": [1, 2, 3, 4, 5, 6], "value": [4, 6, 10, 3, 7, 8]}) + + alt.Chart(source).mark_arc(innerRadius=50).encode( + theta=alt.Theta( + field="value", + type="quantitative"), + color=alt.Color( + field="category", + type="nominal"), + ) + +You can also add a text layer to add labels to a pie chart. + +.. altair-plot:: + import pandas as pd + import altair as alt + + source = pd.DataFrame( + {"category": ["a", "b", "c", "d", "e", "f"], "value": [4, 6, 10, 3, 7, 8]} + ) + + base = alt.Chart(source).encode( + theta=alt.Theta("value:Q", stack=True), color=alt.Color("category:N", legend=None) + ) + + pie = base.mark_arc(outerRadius=120) + text = base.mark_text(radius=140, size=20).encode(text="category:N") + + pie + text + +Area Config +^^^^^^^^^^^ +The ``arc`` property of the top-level ``config`` object sets the default properties for all arc marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The area config can contain any area mark properties (except ``type``, ``style``, and ``clip``). From 668fe0aeb5bb77aa61acc297189580f770cc1943 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Mon, 16 May 2022 19:28:19 -0500 Subject: [PATCH 03/18] area page --- doc/user_guide/marks/area.rst | 165 ++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 doc/user_guide/marks/area.rst diff --git a/doc/user_guide/marks/area.rst b/doc/user_guide/marks/area.rst new file mode 100644 index 000000000..119ba53ef --- /dev/null +++ b/doc/user_guide/marks/area.rst @@ -0,0 +1,165 @@ +.. currentmodule:: altair + +.. _user-guide-area-marks: + +Area +~~~~~~~~~~ +``area`` represent multple data element as a single area shape. +Area marks are often used to show change over time, using either a single area or stacked areas. + +Examples +-------- + +Area Chart +^^^^^^^^^^ +Using ``area`` mark with one temporal or ordinal field (typically on ``x``) and +one quantitative field (typically on ``y``) produces an area chart. For example, +the following area chart shows a number of unemployment people in the US over time. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.unemployment_across_industries.url + + alt.Chart(source).mark_area().encode( + x = 'yearmonth(date):T', + y = 'sum(count):Q', + ).properties( + width=300, + height=200 + ) + +Area Chart with Overlaying Lines and Point Markers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +By setting ``line`` and ``point`` properties of the mark definition +to ``true`` or an object defining a property of the overlaying point marks, we can overlay line and point markers on top of area. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from altair.expr import datum + + source = data.stocks.url + + alt.Chart(source).mark_area(line = True, point = True).encode( + x = 'date:T', + y = 'price:Q', + ).transform_filter( + datum.symbol == 'GOOG' + ) + +Instead of using a single color as the fill color of the area, we can set it to a gradient. +In this example, we are also customizing the overlay. For more information about gradient options see the Vega-Lite Gradient documentation. + +.. altair-plot.:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).transform_filter( + 'datum.symbol==="GOOG"' + ).mark_area( + line={'color':'darkgreen'}, + color=alt.Gradient( + gradient='linear', + stops=[alt.GradientStop(color='white', offset=0), + alt.GradientStop(color='darkgreen', offset=1)], + x1=1, + x2=1, + y1=1, + y2=0 + ) + ).encode( + alt.X('date:T'), + alt.Y('price:Q') + ) + +Stacked Area Chart +^^^^^^^^^^^^^^^^^^ +Adding a color field to area chart creates stacked area chart by default. For example, here we split the area chart by industry. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.unemployment_across_industries.url + + alt.Chart(source).mark_area().encode( + alt.X('yearmonth(date):T', + axis=alt.Axis(format='%Y', domain=False, tickSize=0) + ), + alt.Y('sum(count):Q'), + alt.Color('series:N', + scale=alt.Scale(scheme='category20b') + )) + +Normalized Stacked Area Chart +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can also create a normalized stacked area chart by setting ``"stack"`` to ``"normalize"`` in the encoding channel. Here we can easily see the percentage of unemployment across industries. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.unemployment_across_industries.url + + alt.Chart(source).mark_area().encode( + alt.X('yearmonth(date):T', + axis=alt.Axis(format='%Y', domain=False, tickSize=0) + ), + alt.Y('sum(count):Q', stack = 'normalize'), + alt.Color('series:N', + scale=alt.Scale(scheme='category20b') + )) + +Steamgraph +^^^^^^^^^^^ + +We can also shift the stacked area chart’s baseline to center and produces a streamgraph by setting ``"stack"`` to ``"center"`` in the encoding channel. +Adding the ``interactive`` method allows for changing the scales. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.unemployment_across_industries.url + + alt.Chart(source).mark_area().encode( + alt.X('yearmonth(date):T', + axis=alt.Axis(format='%Y', domain=False, tickSize=0) + ), + alt.Y('sum(count):Q', stack='center', axis=None), + alt.Color('series:N', + scale=alt.Scale(scheme='category20b') + ) + ).interactive() + +Ranged Area +^^^^^^^^^^^ +Specifying ``x2`` or ``y2`` for the quantitative axis of area marks produce ranged areas. For example, we can use ranged area with the ``ci0`` and ``ci0`` +aggregation operators to highlight 95% confidence interval of a line chart that shows mean values over time. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.seattle_weather() + + alt.Chart(source).mark_area(opacity=0.7).encode( + alt.X('monthdate(date):T', title = 'Date'), + alt.Y('mean(temp_max):Q', title = "Daily Temperature Range (C)"), + alt.Y2('mean(temp_min):Q', ) + ).properties( + width=600, + height=300 + ) + +Area Config +^^^^^^^^^^^ +The ``area`` property of the top-level ``config`` object sets the default properties for all area marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The area config can contain any area mark properties (except ``type``, ``style``, ``clip``, and ``orient``). From dde38d4cd5b5469a95fac05f1488b9705326401b Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Mon, 16 May 2022 19:30:59 -0500 Subject: [PATCH 04/18] add marks index --- doc/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/index.rst b/doc/index.rst index ec0a18b1e..9ee7143f5 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -37,7 +37,7 @@ beautiful and effective visualizations with a minimal amount of code. user_guide/data user_guide/encoding - user_guide/marks + user_guide/marks/index user_guide/transform/index user_guide/interactions user_guide/interactions2 From 9a7a4f4e547e5498eec7f9ce82e1c861455dac87 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:20:00 -0500 Subject: [PATCH 05/18] bar subpage --- doc/user_guide/marks/bar.rst | 121 +++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 doc/user_guide/marks/bar.rst diff --git a/doc/user_guide/marks/bar.rst b/doc/user_guide/marks/bar.rst new file mode 100644 index 000000000..a2c7bd8e0 --- /dev/null +++ b/doc/user_guide/marks/bar.rst @@ -0,0 +1,121 @@ +.. currentmodule:: altair + +.. _user-guide-bar-marks: + +Bar +~~~ + +Bar marks are useful in many visualizations, including bar charts, stacked bar charts, and timelines. + +Examples +-------- + +Single Bar Chart +^^^^^^^^^^^^^^^^ +Mapping a quantitative field to either ``x`` or ``y`` of the ``bar`` mark produces a single bar chart. + +.. altair-plot:: + import altair as alt + from altair import datum + from vega_datasets import data + + source = data.population.url + + alt.Chart(source).mark_bar().encode( + alt.X('sum(people):Q', title = "Population") + ).transform_filter( + datum.year == 2000 + ) + +Bar Chart +^^^^^^^^^ +If we map a different discrete field to the ``y`` channel, we can produce a horizontal bar chart. Specifying ``alt.Step(100)`` will adjust the bar’s height per discrete step. + +.. altair-plot:: + import altair as alt + from altair import datum + from vega_datasets import data + + source = data.population.url + + alt.Chart(source).mark_bar().encode( + alt.X('sum(people):Q', title = "Population"), + alt.Y('age:O') + ).transform_filter( + datum.year == 2000 + ).properties(height = 500) + +Bar Chart with a Temporal Axis +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +While the ``bar`` mark typically uses the x and y channels to encode a pair of discrete and continuous fields, it can also be used with continuous fields on both channels. For example, given a bar chart with a temporal field on x, we can see that the x-scale is a continuous scale. By default, the size of bars on continuous scales will be set based on the ``continuousBandSize`` config. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.seattle_weather() + + alt.Chart(source).mark_bar().encode( + alt.X('month(date):T', title = 'Date'), + alt.Y('mean(precipitation):Q'), + ) + +Histograms +^^^^^^^^^^ + +If the data is not pre-aggregated (i.e. each record in the data field represents one item), mapping a binned quantitative field to ``x`` and aggregate ``count`` to ``y`` produces a histogram. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.movies.url + + alt.Chart(source).mark_bar().encode( + alt.X("IMDB_Rating:Q", bin=True), + y='count()', + ) + +Stacked Bar Chart +^^^^^^^^^^^^^^^^^ +Adding color to the bar chart (by using the ``color`` attribute) creates a stacked bar chart by default. Here we also customize the color’s scale range to make the color a little nicer. (See ``stack`` for more details about customizing stack.) + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + alt.Chart(source).mark_bar().encode( + x='variety', + y='sum(yield)', + color='site' + ) + +Grouped Bar Chart with Offset +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame({ + 'category': ['A', 'A', 'B','B', "C", "C"], + 'group': ['x', 'y', 'z', 'x', 'y', 'z'], + 'value': [0.1, 0.6, 0.9, 0.7, 0.2, 0.6] + }) + + alt.Chart(source).mark_bar().encode( + x = alt.X('category:N'), + xOffset = 'group:N', + y = alt.Y('value:Q'), + color = alt.Color('group:N') + ) + +Bar Config +^^^^^^^^^^ + +The ``bar`` property of the top-level ``config`` object sets the default properties for all bar marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +Besides standard mark properties, bar config can contain the following additional properties: +binSpacing, continuousBandSize, and discreteBandSize. \ No newline at end of file From 10baa1540a3eb897ab263c21f8e782193fb6b2c3 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:20:47 -0500 Subject: [PATCH 06/18] boxplot subpage --- doc/user_guide/marks/boxplot.rst | 124 +++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 doc/user_guide/marks/boxplot.rst diff --git a/doc/user_guide/marks/boxplot.rst b/doc/user_guide/marks/boxplot.rst new file mode 100644 index 000000000..37f3bdb48 --- /dev/null +++ b/doc/user_guide/marks/boxplot.rst @@ -0,0 +1,124 @@ +.. currentmodule:: altair + +.. _user-guide-boxplot-marks: + +Box Plot +~~~~~~~~~ + +A box plot summarizes a distribution of quantitative values using a set of summary statistics. The median tick in the box represents the median. The lower and upper parts of the box represent the first and third quartile respectively. Depending on the type of box plot, the ends of the whiskers can represent multiple things. + +To create a box plot, set ``mark`` to ``"boxplot"``. + +Types of Box Plot +^^^^^^^^^^^^^^^^^ +Altair supports two types of box plots, defined by the ``extent`` property in the mark definition object. + +1. Tukey Box Plot is the default box plot in Altair. For a Tukey box plot, the whisker spans from the smallest data to the largest data within the range [Q1 - k * IQR, Q3 + k * IQR] where Q1 and Q3 are the first and third quartiles while IQR is the interquartile range (Q3-Q1). In this type of box plot, you can specify the constant k by setting the ``extent``. If there are outlier points beyond the whisker, they will be displayed using point marks. +By default, the extent is ``1.5``. + +.. altair-plot:: + pip install palmerpenguins + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_boxplot().encode( + alt.X('body_mass_g:Q', scale=alt.Scale(zero=False)) + ) + +2. ``min-max`` Box Plot is a box plot where the lower and upper whiskers are defined as the min and max respectively. No points will be considered as outliers for this type of box plots. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_boxplot(extent = 'min-max').encode( + alt.X('body_mass_g:Q', scale=alt.Scale(zero=False)), + alt.Y('species:N') + ) + +Dimension and Orientation +^^^^^^^^^^^^^^^^^^^^^^^^^ +Altair supports bot 1D and 2D box plots: + +1D box plot shows the distribution of a continuous field. +A boxplot’s orientation is automatically determined by the continuous field axis. For example, you can create a vertical 1D box plot by encoding a continuous field on the y axis. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_boxplot().encode( + alt.Y('body_mass_g:Q', scale=alt.Scale(zero=False)) + ) + +2D box plot shows the distribution of a continuous field, broken down by categories. + +For 2D box plots with one continuous field and one discrete field, the box plot will be horizontal if the continuous field is on the x axis. + +Color, Size, and Opacity Encoding Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can customize the color, size, and opacity of the box in the ``boxplot`` by using the ``color``, ``size``, and ``opacity ``encoding channels. The ``size`` is applied to only the box and median tick. The ``color`` is applied to only the box and the outlier points. Meanwhile, the ``opacity`` is applied to the whole ``boxplot``. + +An example of a ``boxplot`` where the ``size`` encoding channel is specified. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_boxplot().encode( + alt.Y('body_mass_g:Q', scale=alt.Scale(zero=False)), + alt.X('species:N'), + color = 'species:N' + ) + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_boxplot().encode( + alt.Y('body_mass_g:Q', scale=alt.Scale(zero=False)), + alt.X('species:N'), + alt.Color(value = 'teal') + ) + +Tooltip Encoding Channels +^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can add custom tooltips to box plots. The custom tooltip will override the default boxplot’s tooltips. + +If the field in the tooltip encoding is unaggregated, it replaces the tooltips of the outlier marks. On the other hand, if the field in the tooltip encoding is aggregated, it replaces the tooltips of the box and whisker marks. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_boxplot().encode( + alt.X('body_mass_g:Q', scale=alt.Scale(zero=False)), + alt.Y('species:N'), + tooltip = 'mean(body_mass_g):Q' + ) + +Mark Config +^^^^^^^^^^^ +The ``boxplot`` config object sets the default properties for ``boxplot`` marks. + +The boxplot config can contain all boxplot mark properties, including ``box``, ``median``, ``rule``, ``outliers``, and ``ticks``. \ No newline at end of file From b33e39719c7f90e6d6c8dfadf408b97c231be765 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:21:30 -0500 Subject: [PATCH 07/18] circle subpage --- doc/user_guide/marks/circle.rst | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 doc/user_guide/marks/circle.rst diff --git a/doc/user_guide/marks/circle.rst b/doc/user_guide/marks/circle.rst new file mode 100644 index 000000000..4f9fe4a15 --- /dev/null +++ b/doc/user_guide/marks/circle.rst @@ -0,0 +1,31 @@ +.. currentmodule:: altair + +.. _user-guide-circle-marks: + +Circle +~~~~~~ + +``circle`` mark is similar to ``point`` mark, except that (1) the ``shape`` value is always set to ``circle`` (2) they are filled by default. + +Scatterplot with Circle +^^^^^^^^^^^^^^^^^^^^^^^ + +Here is an example scatter plot with ``circle`` marks: + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars.url + + alt.Chart(source).mark_circle().encode( + x = ('Horsepower:Q'), + y = ('Miles_per_Gallon:Q') + ) + +Circle Config +^^^^^^^^^^^^^ + +The ``circle`` property of the top-level ``config`` object sets the default properties for all circle marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The circle config can contain any circle mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From 70798ae5b4b1f56d55eba81d1c4dbcf734230f92 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:22:03 -0500 Subject: [PATCH 08/18] errorband subpage --- doc/user_guide/marks/errorband.rst | 148 +++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 doc/user_guide/marks/errorband.rst diff --git a/doc/user_guide/marks/errorband.rst b/doc/user_guide/marks/errorband.rst new file mode 100644 index 000000000..e9e7ec3f4 --- /dev/null +++ b/doc/user_guide/marks/errorband.rst @@ -0,0 +1,148 @@ +.. currentmodule:: altair + +.. _user-guide-errorband-marks: + +Error Band +~~~~~~~~~~ + +An error band summarizes an error range of quantitative values using a set of summary statistics, representing by area. Error band in Altair can either be used to aggregate raw data or directly visualize aggregated data. + +To create an error band, set ``mark`` to ``"errorband"``. + +Comparing the usage of Error Band to the usage of Error Bar +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +All the properties and usage of error band are identical to error bar’s, except the ``band`` and ``borders`` that replace the error bar’s ``rule`` and ``ticks``. + +**Error Band** + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars.url + + alt.Chart(source).mark_errorband(extent = 'ci', borders = True).encode( + x = 'year(Year)', + y = alt.Y('Miles_per_Gallon:Q', scale=alt.Scale(zero=False), title = 'Miles per Gallon (95% CIs)') + ) + +**Error Bar** + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars.url + + alt.Chart(source).mark_errorbar(extent = 'ci', ticks = True).encode( + x = 'year(Year)', + y = alt.Y('Miles_per_Gallon:Q', scale=alt.Scale(zero=False), title = 'Miles per Gallon (95% CIs)') + ) + +Using Error Band to Aggregate Raw Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the data is not aggregated yet, Altair will aggregate the data based on the ``extent`` properties in the mark definition as done in the error band showing confidence interval above. All other ``extent`` values are defined in Error Bar. + +Using Error Band to Visualize Aggregated Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +1. Data is aggregated with low and high values of the error band +If the data is already pre-aggregated with low and high values of the error band, you can directly specify ``x`` and ``x2`` (or ``y`` and ``y2``) to use error band as a ranged mark. + +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame({ + 'ci1': [23.5007, 25.8214, 26.4472, 27.7074], + 'ci0': [19.6912, 20.8554, 21.9749, 22.6203], + 'center': [21.5735, 23.3750, 24.0611, 25.0931], + 'Year': [189302400000, 220924800000, 252460800000, 283996800000] + }) + + band = alt.Chart(source).mark_errorband().encode( + alt.Y('ci1:Q', scale=alt.Scale(zero=False), title = 'Mean of Miles per Gallon (95% CIs)'), + alt.Y2('ci0:Q'), + alt.X('year(Year)') + ) + + line = alt.Chart(source).mark_line().encode( + alt.Y('center:Q'), + alt.X('year(Year)') + ) + + band + line + +2. Data is aggregated with center and error value(s) +If the data is already pre-aggregated with center and error values of the error band, you can use ``x/y``, ``x/yError``, and ``x/yError2`` as defined in Error Bar. + +Dimension +^^^^^^^^^ + +Altair supports both 1D and 2D error bands: + +A **1D error band** shows the error range of a continuous field; it can be used to show the global error range of the whole plot. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars.url + + band = alt.Chart(source).mark_errorband(extent = 'stdev').encode( + alt.Y('Miles_per_Gallon:Q', title = "Miles per Gallon") + ) + + points = alt.Chart(source).mark_point().encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q' + ) + + band + points + +A **2D error** band shows the error range of a continuous field for each dimension value such as year. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + line = alt.Chart(source).mark_line().encode( + x='Year', + y='mean(Miles_per_Gallon)' + ) + + band = alt.Chart(source).mark_errorband(extent='ci').encode( + x='Year', + y=alt.Y('Miles_per_Gallon', title='Miles/Gallon'), + ) + + band + line + +Color and Opacity Encoding Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can customize the color, size, and opacity of the band in the ``errorband`` by using the ``color`` and ``opacity`` encoding channels, which applied to the whole ``errorband``. + +Here is an example of a ``errorband`` with the ``color`` encoding channel set to ``alt.value('black')``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars.url + + alt.Chart(source).mark_errorband(extent = 'ci', borders = True).encode( + x = 'year(Year)', + y = alt.Y('Miles_per_Gallon:Q', scale=alt.Scale(zero=False), title = 'Miles per Gallon (95% CIs)'), + color = alt.value('black') + ) + +Mark Config +^^^^^^^^^^^ + +The ``errorband`` config object sets the default properties for ``errorband`` marks. + +The error band config can contain all error band mark properties but currently not supporting ``color``, ``opacity``, and ``orient``. \ No newline at end of file From b6fe6bb8e86c2c6fe1a9fae8adcfbf6b0b962654 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:22:35 -0500 Subject: [PATCH 09/18] errorbar subpage --- doc/user_guide/marks/errorbar.rst | 223 ++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 doc/user_guide/marks/errorbar.rst diff --git a/doc/user_guide/marks/errorbar.rst b/doc/user_guide/marks/errorbar.rst new file mode 100644 index 000000000..f0b306008 --- /dev/null +++ b/doc/user_guide/marks/errorbar.rst @@ -0,0 +1,223 @@ +.. currentmodule:: altair + +.. _user-guide-errorbar-marks: + +Error Bar +~~~~~~~~~~ + +An error bar summarizes an error range of quantitative values using a set of summary statistics, representing by rules (and optional end ticks). Error bars in Vega-Lite can either be used to aggregate raw data or directly visualize aggregated data. + +To create an error bar, set ``mark`` to ``"errorbar"``. + +Using Error Bars to Aggregate Raw Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If the data is not aggregated yet, Altair will aggregate the data based on the ``extent`` properties in the mark definition. + +1. **Error bars showing standard error** is the default error bar in Vega-Lite. It can also be explicitly specified by setting ``extent`` to ``"stderr"``. The length of lower and upper rules represent standard error. By default, the rule marks expand from the mean. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + error_bars = alt.Chart(source).mark_errorbar().encode( + x=alt.X('yield:Q', scale=alt.Scale(zero=False)), + y=alt.Y('variety:N') + ) + + points = alt.Chart(source).mark_point(filled=True, color='black').encode( + x=alt.X('yield:Q', aggregate='mean'), + y=alt.Y('variety:N'), + ) + + error_bars + points + +2. **Error bar showing standard deviation** can be specified by setting ``extent`` to ``"stdev"``. For this type of error bar, the length of lower and upper rules represent standard deviation. Like an error bar that shows Standard Error, the rule marks expand from the mean by default. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + error_bars = alt.Chart(source).mark_errorbar(extent = 'stdev').encode( + x=alt.X('yield:Q', scale=alt.Scale(zero=False)), + y=alt.Y('variety:N') + ) + + points = alt.Chart(source).mark_point(filled=True, color='black').encode( + x=alt.X('yield:Q', aggregate='mean'), + y=alt.Y('variety:N'), + ) + + error_bars + points + +3. **Error bars showing interquartile range** can be specified by setting ``extent`` to ``"iqr"``. For this type of error bar, the rule marks expand from the first quartile to the third quartile. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + error_bars = alt.Chart(source).mark_errorbar(extent = 'iqr').encode( + x=alt.X('yield:Q', scale=alt.Scale(zero=False)), + y=alt.Y('variety:N') + ) + + points = alt.Chart(source).mark_point(filled=True, color='black').encode( + x=alt.X('yield:Q', aggregate='mean'), + y=alt.Y('variety:N'), + ) + + error_bars + points + +Using Error Bars to Visualize Aggregated Data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +1. Data is aggregated with low and high values of the error bars +If the data is already pre-aggregated with low and high values of the error bars, you can directly specify ``x`` and ``x2`` (or ``y`` and ``y2``) to use error bar as a ranged mark. + +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame({ + 'lower_yield': [23.1311, 23.9503, 24.7778, 21.7823], + 'upper_yield': [43.5522, 38.9775, 46.9167, 48.9732], + 'center': [32.4, 30.96667, 33.966665, 30.45], + 'variety': ["Glabron", "Manchuria", "No. 457", "No. 462"] + }) + + bar = alt.Chart(source).mark_errorbar().encode( + alt.X('upper_yield:Q', scale=alt.Scale(zero=False), title = 'yield'), + alt.X2('lower_yield:Q'), + alt.Y('variety:N') + ) + + point = alt.Chart(source).mark_point(filled = True, color = 'black').encode( + alt.X('center:Q'), + alt.Y('variety:N') + ) + + point + bar + +2. Data is aggregated with center and error value(s) +If the data is already pre-aggregated with center and error values of the error bars, you can directly specify ``x`` as center, ``xError`` and ``xError2`` as error values extended from center (or ``y``, ``yError``, and ``yError2``). If ``x/yError2`` is omitted, error bars have symmetric error values. + +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame({ + 'yield_error': [7.5522, 6.9775, 3.9167, 11.9732], + 'yield_center': [32.4, 30.96667, 33.966665, 30.45], + 'variety': ["Glabron", "Manchuria", "No. 457", "No. 462"] + }) + + bar = alt.Chart(source).mark_errorbar().encode( + x = alt.X('yield_center:Q', scale=alt.Scale(zero=False), title = 'yield'), + xError = ('yield_error:Q'), + y = alt.Y('variety:N') + ) + + point = alt.Chart(source).mark_point(filled = True, color = 'black').encode( + alt.X('yield_center:Q'), + alt.Y('variety:N') + ) + + point + bar + +**Note** if error is pre-aggregated with asymmetric error values one of ``x/yError`` and ``x/yError2`` has to be positive value and other has to be negative value. + +Dimension & Orientation +^^^^^^^^^^^^^^^^^^^^^^^ +Altair supports both 1D and 2D error bands: + +A **1D error band** shows the error range of a continuous field. + +The orientation of an error bar is automatically determined by the continuous field axis. For example, you can create a vertical 1D error bar by encoding a continuous field on the y axis. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + error_bars = alt.Chart(source).mark_errorbar().encode( + alt.Y('yield:Q', scale=alt.Scale(zero=False)) + ) + + points = alt.Chart(source).mark_point(filled=True, color='black').encode( + alt.Y('yield:Q', aggregate='mean') + ) + + error_bars + points + +A **2D error bar** shows the error range of a continuous field, broken down by categories. + +For 2D error bars with one continuous field and one discrete field, the error bars will be horizontal if the continuous field is on the x axis. Alternatively, if the continuous field is on the y axis, the error bar will be vertical. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + error_bars = alt.Chart(source).mark_errorbar(extent = 'stdev').encode( + alt.Y('yield:Q', scale=alt.Scale(zero=False)), + alt.X('variety:N') + ) + + points = alt.Chart(source).mark_point(filled=True, color='black').encode( + alt.Y('yield:Q', aggregate='mean'), + alt.X('variety:N'), + ) + + error_bars + points + +Color, and Opacity Encoding Channels +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +You can customize the color, size, and opacity of the bar in the ``errorbar`` by using the ``color`` and ``opacity`` encoding channels, which are applied to the whole errorbar. + +Here is an example of a ``errorbar`` with the ``color`` encoding channel set to ``alt.value("#4682b4")``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + error_bars = alt.Chart(source).mark_errorbar(ticks = True).encode( + alt.X('yield:Q', scale=alt.Scale(zero=False)), + alt.Y('variety:N'), + color = alt.value("#4682b4") + ) + + points = alt.Chart(source).mark_point(filled=True, color='black').encode( + alt.X('yield:Q', aggregate='mean'), + alt.Y('variety:N'), + ) + + error_bars + points + +Tooltip Encoding Channels +^^^^^^^^^^^^^^^^^^^^^^^^^ +You can add custom tooltips to error bars. The custom tooltip will override the default error bar’s tooltips. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.barley() + + alt.Chart(source).mark_errorbar().encode( + alt.X('yield:Q', scale=alt.Scale(zero=False)), + alt.Y('variety:N'), + tooltip = 'variety:N' + ) + +Mark Config +^^^^^^^^^^^ +The ``errorbar`` config object sets the default properties for ``errorbar`` marks. + From 2e82e4bb0248c872d75752438de543ee10bbb475 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:23:25 -0500 Subject: [PATCH 10/18] image subpage --- doc/user_guide/marks/image.rst | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 doc/user_guide/marks/image.rst diff --git a/doc/user_guide/marks/image.rst b/doc/user_guide/marks/image.rst new file mode 100644 index 000000000..510c3f6dc --- /dev/null +++ b/doc/user_guide/marks/image.rst @@ -0,0 +1,36 @@ +.. currentmodule:: altair + +.. _user-guide-image-marks: + +Image +~~~~~~ + +Image marks allow external images, such as icons or photographs, to be included in Altair visualizations. Image files such as PNG or JPG images are loaded from provided URLs. + +Scatterplot with Image Marks +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame.from_records([ + {"x": 0.5, "y": 0.5, "img": "https://vega.github.io/vega-datasets/data/ffox.png"}, + {"x": 1.5, "y": 1.5, "img": "https://vega.github.io/vega-datasets/data/gimp.png"}, + {"x": 2.5, "y": 2.5, "img": "https://vega.github.io/vega-datasets/data/7zip.png"} + ]) + + alt.Chart(source).mark_image( + width=50, + height=50 + ).encode( + x='x', + y='y', + url='img' + ) + +Image Config +^^^^^^^^^^^^ +The ``image`` property of the top-level ``config`` object sets the default properties for all image marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The image config can contain any image mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From 6f26d9bbb5941f5bee0763e5ff40b034c5a138fd Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:23:55 -0500 Subject: [PATCH 11/18] line subpage --- doc/user_guide/marks/line.rst | 295 ++++++++++++++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 doc/user_guide/marks/line.rst diff --git a/doc/user_guide/marks/line.rst b/doc/user_guide/marks/line.rst new file mode 100644 index 000000000..34a78a8f0 --- /dev/null +++ b/doc/user_guide/marks/line.rst @@ -0,0 +1,295 @@ +.. currentmodule:: altair + +.. _user-guide-line-marks: + +Line +~~~~ +The ``line`` mark represents the data points stored in a field with a line connecting all of these points. Line marks are commonly used to depict trajectories or change over time. Unlike most other marks that represent one data element per mark, one line mark represents multiple data element as a single line, akin to ``area`` and ``trail``. + +Note: For line segments that connect (x,y) positions to (x2,y2) positions, please use ``rule`` marks. For continuous lines with varying size, please use ``trail`` marks. + +Examples +-------- +Line Chart +^^^^^^^^^^ +Using line with one temporal or ordinal field (typically on ``x``) and another quantitative field (typically on ``y``) produces a simple line chart with a single line. + +.. altair-plot:: + import altair as alt + from altair import datum + from vega_datasets import data + + + source = data.stocks() + + alt.Chart(source).mark_line().encode( + x='date', + y='price', + ).transform_filter( + datum.symbol == 'GOOG') + +We can add create multiple lines by grouping along different attributes, such as ``color`` or ``detail``. + +Multi-series Colored Line Chart +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Adding a field to a mark property channel such as ``color`` groups data points into different series, producing a multi-series colored line chart. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line().encode( + x='date', + y='price', + color='symbol', + ) + +We can further apply selection to highlight a certain line on hover. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + highlight = alt.selection(type='single', on='mouseover', + fields=['symbol'], nearest=True) + + base = alt.Chart(source).encode( + x='date:T', + y='price:Q', + color='symbol:N' + ) + + points = base.mark_circle().encode( + opacity=alt.value(0) + ).add_selection( + highlight + ).properties( + width=600 + ) + + lines = base.mark_line().encode( + size=alt.condition(~highlight, alt.value(1), alt.value(3)) + ) + + points + lines + +Multi-series Line Chart with Varying Dashes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Adding a field to ``strokeDash`` also produces a multi-series line chart. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line().encode( + x='date', + y='price', + strokeDash='symbol', + ) + +We can also use line grouping to create a line chart that has multiple parts with varying styles. + +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame({ + 'a' : ['A', 'B', 'D', 'E', 'E', 'G', 'H'], + 'b' : [28, 55, 91, 81, 81, 19, 87], + 'predicted' : [False, False, False, False, True, True, True] + }) + + alt.Chart(source).mark_line().encode( + x = 'a:O', + y = 'b:Q', + strokeDash = 'predicted:N' + ) + +Multi-series Line Chart with the Detail Channel +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +To group lines by a field without mapping the field to any visual properties, we can map the field to the ``detail`` channel to create a multi-series line chart with the same color. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line().encode( + x='date', + y='price', + detail='symbol', + ) + +The same method can be used to group lines for a ranged dot plot. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.countries() + + base = alt.Chart(source).encode( + alt.X('life_expect:Q', title= 'Life Expectancy (years)', scale=alt.Scale(zero=False)), + alt.Y('country:N', title = 'Country', axis = alt.Axis(offset = 5, ticks = False, minExtent = 70, domain = False)), + ).transform_filter( + alt.FieldOneOfPredicate(field = 'country', + oneOf = ["China", "India", "United States", "Indonesia", "Brazil"]) + ) + + line = base.mark_line().encode( + detail = 'country', + color = alt.value("#db646f") + ).transform_filter( + alt.FieldOneOfPredicate(field = 'year', oneOf = [1995, 2000]) + ) + + point = base.mark_point(filled = True).encode( + alt.Color(field = 'year', + scale = alt.Scale(range = ["#e6959c", "#911a24"], domain = [1995, 2000])), + size = alt.value(100), + opacity = alt.value(1) + ) + + line + point + +Line Chart with Point Markers +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +By setting the ``point`` property of the mark definition to ``true`` or an object defining a property of the overlaying point marks, we can overlay point markers on top of line. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line(point=True).encode( + x='year(date)', + y='mean(price):Q', + color='symbol:N' + ) + +This is equivalent to adding another layer of filled point marks. + +Note that the overlay point marks have ``opacity`` = 1 by default (instead of semi-transparent like normal point marks). + +Here we create stroked points by setting their ``\"filled\"`` to ``false`` and their fill to ``\"white\"``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line(point= alt.OverlayMarkDef(filled = False, fill = 'white')).encode( + x='year(date)', + y='mean(price):Q', + color='symbol:N' + ) + +Connected Scatter Plot (Line Chart with Custom Path) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The line’s path (order of points in the line) is determined by data values on the temporal/ordinal field by default. However, a field can be mapped to the ``order`` channel for determining a custom path. + +For example, to show a pattern of data change over time between gasoline price and average miles driven per capita we use ``order`` channel to sort the points in the line by time field (year). In this example, we also use the ``point`` property to overlay point marks over the line marks to highlight each data point. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.driving() + + alt.Chart(source).mark_line(point=True).encode( + alt.X('miles', scale=alt.Scale(zero=False)), + alt.Y('gas', scale=alt.Scale(zero=False)), + order='year' + ) + +Line interpolation +^^^^^^^^^^^^^^^^^^ +The ``interpolate`` property of a mark definition can be used to change line interpolation method. For example, we can set ``interpolate`` to ``"monotone"``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line(interpolate='monotone').encode( + x='date', + y='price' + ).transform_filter( + alt.datum.symbol == 'GOOG' + ) + +We can also set ``interpolate`` to ``"step-after"`` to create a step-chart. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_line(interpolate='step-after').encode( + x='date', + y='price' + ).transform_filter( + alt.datum.symbol == 'GOOG' + ) + +Geo Line +^^^^^^^^ +By mapping geographic coordinate data to ``longitude`` and ``latitude`` channels of a corresponding projection, we can draw lines through geographic points. + + +.. altair-plot:: + import altair as alt + from vega_datasets import data + import pandas as pd + + airports = data.airports.url + flights_airport = data.flights_airport.url + + states = alt.topo_feature(data.us_10m.url, feature="states") + + lookup_data = alt.LookupData( + airports, key="iata", fields=["state", "latitude", "longitude"] + ) + + source = pd.DataFrame({ + 'airport' : ['SEA', 'SFO', 'LAX', 'LAS', 'DFW', 'DEN', 'ORD', 'JFK'], + 'order' : [1, 2, 3, 4, 5, 6, 7, 8], + }) + + background = alt.Chart(states).mark_geoshape( + fill="lightgray", + stroke="white" + ).properties( + width=750, + height=500 + ).project("albersUsa") + + line = alt.Chart(source).mark_line().encode( + latitude="latitude:Q", + longitude="longitude:Q", + order = 'order' + ).transform_lookup( + lookup = 'airport', + from_ = lookup_data + ) + + background + line + +Line Config +^^^^^^^^^^^ + +The ``line`` property of the top-level ``config`` object sets the default properties for all line marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The line config can contain any line mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From 471c1d6bf2b121d3ebc5076b61662cfcaf3eb002 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:25:19 -0500 Subject: [PATCH 12/18] point subpage --- doc/user_guide/marks/point.rst | 164 +++++++++++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 doc/user_guide/marks/point.rst diff --git a/doc/user_guide/marks/point.rst b/doc/user_guide/marks/point.rst new file mode 100644 index 000000000..5b1c70058 --- /dev/null +++ b/doc/user_guide/marks/point.rst @@ -0,0 +1,164 @@ +.. currentmodule:: altair + +.. _user-guide-point-marks: + +Point +~~~~~ +``point`` mark represents each data point with a symbol. Point marks are commonly used in visualizations like scatterplots. + +Examples +-------- +Dot Plot +^^^^^^^^ +Mapping a field to either only ``x`` or only ``y`` of point marks creates a dot plot. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.movies() + alt.Chart(source).mark_point().encode( + x = 'IMDB_Rating:Q' + ) + +Scatter Plot +^^^^^^^^^^^^ +Mapping fields to both the ``x`` and ``y`` channels creates a scatter plot. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_point().encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q' + ) + +By default, ``point`` marks only have borders and are transparent inside. You can create a filled point by setting ``filled`` to ``true``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_point(filled = True).encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q' + ) + +Bubble Plot +^^^^^^^^^^^ +By mapping a third field to the ``size`` channel in the scatter plot, we can create a bubble plot instead. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_point().encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q', + size = 'Acceleration:Q' + ) + +Scatter Plot with Color and/or Shape +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Fields can also be encoded in the scatter plot using the ``color`` or ``shape`` channels. For example, this specification encodes the field ``Origin`` with both ``color`` and ``shape``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from palmerpenguins import load_penguins + + source = load_penguins() + + alt.Chart(source).mark_point().encode( + alt.X('flipper_length_mm:Q', scale = alt.Scale(zero = False)), + alt.Y('body_mass_g:Q', scale = alt.Scale(zero = False)), + color = 'species:N', + shape = 'species:N' + ) + +Dot Plot with Jittering +^^^^^^^^^^^^^^^^^^^^^^^ +To jitter points on a discrete scale, you can add random offset: + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_point().encode( + x = 'Horsepower:Q', + y = 'Cylinders:O', + yOffset = 'random:Q' + ).transform_calculate( + random = 'random()' + ).properties( + height = alt.Step(50) + ) + +Wind Vector Example +^^^^^^^^^^^^^^^^^^^ +We can also use point mark with ``wedge`` as ``shape`` and ``angle`` encoding to create a wind vector map. Other shape options are: +``"circle"``, ``"square"``, ``"cross"``, ``"diamond"``, ``"triangle-up"``, ``"triangle-down"``, ``"triangle-right"``, ``"triangle-left"``, ``"stroke"``, ``"arrow"``, and ``"triangle"``. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.windvectors() + + alt.Chart(source).mark_point(shape="wedge", filled=True).encode( + latitude="latitude", + longitude="longitude", + color=alt.Color( + "dir", scale=alt.Scale(domain=[0, 360], scheme="rainbow"), legend=None + ), + angle=alt.Angle("dir", scale=alt.Scale(domain=[0, 360], range=[180, 540])), + size=alt.Size("speed", scale=alt.Scale(rangeMax=500)), + ).project("equalEarth") + +Geo Point +^^^^^^^^^ +By mapping geographic coordinate data to ``longitude`` and ``latitude`` channels of a corresponding projection, we can visualize geographic points. The example below shows major airports in the US. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + airports = data.airports() + states = alt.topo_feature(data.us_10m.url, feature='states') + + # US states background + background = alt.Chart(states).mark_geoshape( + fill='lightgray', + stroke='white' + ).properties( + width=500, + height=300 + ).project('albersUsa') + + # airport positions on background + points = alt.Chart(airports).mark_circle( + size=10, + color='steelblue' + ).encode( + longitude='longitude:Q', + latitude='latitude:Q', + tooltip=['name', 'city', 'state'] + ) + + background + points + +Point Config +^^^^^^^^^^^^ +The ``point`` property of the top-level ``config`` object sets the default properties for all point marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The point config can contain any point mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From b1b9b210ebb878d1a7bbef0d9ae2671bed547fcb Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:25:51 -0500 Subject: [PATCH 13/18] rect subpage --- doc/user_guide/marks/rect.rst | 58 +++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 doc/user_guide/marks/rect.rst diff --git a/doc/user_guide/marks/rect.rst b/doc/user_guide/marks/rect.rst new file mode 100644 index 000000000..1a5043a92 --- /dev/null +++ b/doc/user_guide/marks/rect.rst @@ -0,0 +1,58 @@ +.. currentmodule:: altair + +.. _user-guide-rect-marks: + +Rect +~~~~ + +The ``rect`` mark represents an arbitrary rectangle. + +Examples +-------- +Heatmap +^^^^^^^ + +Using the ``rect`` marks with discrete fields on ``x`` and ``y`` channels creates a heatmap. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.seattle_weather() + + alt.Chart(source).mark_rect().encode( + alt.X('date(date):O', title = 'Day', axis = alt.Axis(labelAngle = 0, format = '%e')), + alt.Y('month(date):O', title = 'Month'), + alt.Color('max(temp_max):Q', title = "Max Temp") + ) + +Ranged Rectangles +^^^^^^^^^^^^^^^^^ +Specifying both ``x`` and ``x2`` and/or ``y`` and ``y2`` creates a rectangle that spans over certain x and/or y values. + +For example, we can use ``rect`` to create an annotation ``layer`` that provides a shading between global ``min`` and ``max`` values. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + point = alt.Chart(source).mark_point().encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q' + ) + + rect = alt.Chart(source).mark_rect().encode( + y = 'max(Miles_per_Gallon)', + y2 = 'min(Miles_per_Gallon)', + opacity = alt.value(0.2) + ) + + point + rect + +Rect Config +^^^^^^^^^^^ +The ``rect`` property of the top-level ``config`` object sets the default properties for all rect marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The rect config can contain any rect mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From a490c8ce9ad0dd56fe41272fdf5ef98685dcc3f6 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:26:12 -0500 Subject: [PATCH 14/18] rule subpage --- doc/user_guide/marks/rule.rst | 97 +++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 doc/user_guide/marks/rule.rst diff --git a/doc/user_guide/marks/rule.rst b/doc/user_guide/marks/rule.rst new file mode 100644 index 000000000..07a1bc2bd --- /dev/null +++ b/doc/user_guide/marks/rule.rst @@ -0,0 +1,97 @@ +.. currentmodule:: altair + +.. _user-guide-rule-marks: + +Rule +~~~~ +The ``rule`` mark represents each data point as a line segment. It can be used in two ways. First, as a line segment that spans the complete width or height of a view. Second, a rule can be used to draw a line segment between two positions. + +Examples +-------- +Width/Height-Spanning Rules +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If the ``rule`` mark only has ``y`` encoding, the output view produces horizontal rules that spans the complete width. Similarly, if the ``rule`` mark only has ``x`` encoding, the output view produces vertical rules that spans the height. + +We can use rules to show the average price of different stocks akin to ``tick`` marks. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_rule().encode( + y = 'mean(price):Q', + size = alt.value(2), + color = 'symbol:N' + ) + +The fact that rule marks span the width or the height of a single view make them useful as an annotation layer. For example, we can use rules to show average values of different stocks alongside the price curve. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + base = alt.Chart(source).properties(width=550) + + line = base.mark_line().encode( + x='date', + y='price', + color='symbol' + ) + + rule = base.mark_rule().encode( + y='average(price)', + color='symbol', + size=alt.value(2) + ) + + line + rule + +We can also use a rule mark to show global mean value over a histogram. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.movies.url + + base = alt.Chart(source) + + bar = base.mark_bar().encode( + x=alt.X('IMDB_Rating:Q', bin=True, axis=None), + y='count()' + ) + + rule = base.mark_rule(color='red').encode( + x='mean(IMDB_Rating):Q', + size=alt.value(5) + ) + + bar + rule + +Ranged Rules +^^^^^^^^^^^^ +To control the spans of horizontal/vertical rules, ``x`` and ``x2``/ ``y`` and ``y2`` channels can be specified. + +For example, we can use ``y`` and ``y2 ``show the ``"min"`` and ``"max"`` values of horsepowers for cars from different locations. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_rule().encode( + x = 'Origin', + y = 'min(Horsepower)', + y2 = 'max(Horsepower)' + ) + +Rule Config +^^^^^^^^^^^ +The ``rule`` property of the top-level ``config`` object sets the default properties for all rule marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The rule config can contain any rule mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From 82e715d1475c21b0e05471893d7ac3a619dbf1b2 Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:26:42 -0500 Subject: [PATCH 15/18] square subpage --- doc/user_guide/marks/square.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 doc/user_guide/marks/square.rst diff --git a/doc/user_guide/marks/square.rst b/doc/user_guide/marks/square.rst new file mode 100644 index 000000000..8d4674f98 --- /dev/null +++ b/doc/user_guide/marks/square.rst @@ -0,0 +1,26 @@ +.. currentmodule:: altair + +.. _user-guide-square-marks: + +Square +~~~~~~ +``square`` marks is similar to ``point`` mark, except that (1) the ``shape`` value is always set to ``square`` (2) they are filled by default. + +Scatterplot with Square +----------------------- +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_square().encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q' + ) + +Square Config +^^^^^^^^^^^^^ +The ``square`` property of the top-level ``config`` object sets the default properties for all square marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The square config can contain any point mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file From fa375a2f6c9a470ea01e610a8ed8d7d1918b65fb Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:27:22 -0500 Subject: [PATCH 16/18] text subpage --- doc/user_guide/marks/text.rst | 127 ++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 doc/user_guide/marks/text.rst diff --git a/doc/user_guide/marks/text.rst b/doc/user_guide/marks/text.rst new file mode 100644 index 000000000..612c207c7 --- /dev/null +++ b/doc/user_guide/marks/text.rst @@ -0,0 +1,127 @@ +.. currentmodule:: altair + +.. _user-guide-text-marks: + +Text +~~~~~~ +``text`` mark represents each data point with a text instead of a point. + +Examples +-------- +Text Table Heatmap +^^^^^^^^^^^^^^^^^^ +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + base = alt.Chart(source).transform_aggregate( + num_cars='count()', + groupby=['Origin', 'Cylinders'] + ).encode( + alt.X('Cylinders:O', scale=alt.Scale(paddingInner=0)), + alt.Y('Origin:O', scale=alt.Scale(paddingInner=0)), + ) + + heatmap = base.mark_rect().encode( + color=alt.Color('num_cars:Q', + scale=alt.Scale(scheme='viridis'), + legend=alt.Legend(direction='horizontal') + ) + ) + + text = base.mark_text(baseline='middle').encode( + text='num_cars:Q', + color=alt.condition( + alt.datum.num_cars > 100, + alt.value('black'), + alt.value('white') + ) + ) + + heatmap + text + +Labels +^^^^^^ +You can also use ``text`` marks as labels for other marks and set offset (``dx`` or ``dy``), ``align``, and ``baseline`` properties of the mark definition. + +.. altair-plot:: + import altair as alt + import pandas as pd + + source = pd.DataFrame({ + 'a' : ['A', 'B', 'C'], + 'b' : [28, 55, 43] + }) + + bar = alt.Chart(source).mark_bar().encode( + y = 'a:N', + x = alt.X('b:Q', scale = alt.Scale(domain = [0,60])) + ) + + text = bar.mark_text( + align = 'left', + baseline = 'middle', + dx = 3 + ).encode( + text = 'b' + ) + + bar + text + +Scatterplot with Text +^^^^^^^^^^^^^^^^^^^^^ +Mapping a field to ``text`` channel of text mark sets the mark’s text value. For example, we can make a colored scatterplot with text marks showing the initial character of its origin, instead of ``point`` marks. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + from altair import datum + + source = data.cars() + + alt.Chart(source).mark_text().encode( + x = 'Horsepower:Q', + y = 'Miles_per_Gallon:Q', + color = 'Origin:N', + text = 'Origin[0]:N' + ) + +Geo Text +^^^^^^^^ +By mapping geographic coordinate data to ``longitude`` and ``latitude`` channels of a corresponding projection, we can show text at accurate locations. The example below shows the name of every US state capital at the location of the capital. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + states = alt.topo_feature(data.us_10m.url, feature="states") + + source = data.us_state_capitals() + + background = alt.Chart(states).mark_geoshape( + fill="lightgray", + stroke="white" + ).properties( + width=750, + height=500 + ).project("albersUsa") + + line = alt.Chart(source).mark_text(dy = -10).encode( + latitude="lat:Q", + longitude="lon:Q", + text = 'city:N' + ) + + point = alt.Chart(source).mark_circle().encode( + latitude="lat:Q", + longitude="lon:Q", + color = alt.value('orange') + ) + + background + line + point + +Text Config +^^^^^^^^^^^ +The ``text`` property of the top-level ``config`` object sets the default properties for all text marks. If mark property encoding channels are specified for marks, these config values will be overridden. \ No newline at end of file From 3a2a587455e1fc5818d7f6a48de197fcc8cea47f Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:27:50 -0500 Subject: [PATCH 17/18] tick subpage --- doc/user_guide/marks/tick.rst | 58 +++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 doc/user_guide/marks/tick.rst diff --git a/doc/user_guide/marks/tick.rst b/doc/user_guide/marks/tick.rst new file mode 100644 index 000000000..dfc655b59 --- /dev/null +++ b/doc/user_guide/marks/tick.rst @@ -0,0 +1,58 @@ +.. currentmodule:: altair + +.. _user-guide-tick-marks: + +Tick +~~~~ +The ``tick`` mark represents each data point as a short line. This is a useful mark for displaying the distribution of values in a field. + +Examples +-------- +Dot Plot +^^^^^^^^ +The following dot plot uses tick marks to show the distribution of precipitation in Seattle. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.seattle_weather() + + alt.Chart(source).mark_tick().encode( + x='precipitation:Q' + ) +Strip Plot +^^^^^^^^^^ +By adding a ``y`` field, a strip plot can be created that shows the distribution of horsepower by number of cylinders. + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.cars() + + alt.Chart(source).mark_tick().encode( + x='Horsepower:Q', + y='Cylinders:O' + ) + +Tick Config +^^^^^^^^^^^ +The ``tick`` property of the top-level ``config`` object sets the default properties for all tick marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +Besides standard mark config properties, tick config can contain the following additional properties: ``bandSize`` and ``thickness``. + +Customizing Tick’s Size and Thickness +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.seattle_weather() + + alt.Chart(source).mark_tick().encode( + x='precipitation:Q' + ).configure_tick( + thickness = 2, + bandSize = 10 + ) \ No newline at end of file From 18080401a6f60bef5f1a5acada0803187ff1ef5b Mon Sep 17 00:00:00 2001 From: Hannah Barton Date: Thu, 26 May 2022 15:28:22 -0500 Subject: [PATCH 18/18] trail subpage --- doc/user_guide/marks/trail.rst | 57 ++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 doc/user_guide/marks/trail.rst diff --git a/doc/user_guide/marks/trail.rst b/doc/user_guide/marks/trail.rst new file mode 100644 index 000000000..16278b1b9 --- /dev/null +++ b/doc/user_guide/marks/trail.rst @@ -0,0 +1,57 @@ +.. currentmodule:: altair + +.. _user-guide-trail-marks: + +Trail +~~~~~ +The ``trail`` mark represents the data points stored in a field with a line connecting all of these points. Trail is similar to the ``line`` mark but a trail can have variable widths determined by backing data. Unlike lines, trails do not support different interpolation methods and use ``fill`` (not ``stroke``) for their color. Trail marks are useful if you want to draw lines with changing size to reflect the underlying data. + +Examples +-------- +A Line Chart with varying size using ``trail`` mark +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. altair-plot:: + import altair as alt + from vega_datasets import data + + source = data.stocks() + + alt.Chart(source).mark_trail().encode( + x='date', + y='price', + color='symbol', + size='price', + ) + +A Comet Chart showing changes between two states +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. altair-plot:: + import altair as alt + import vega_datasets + + ( + alt.Chart(vega_datasets.data.barley.url) + .transform_pivot("year", value="yield", groupby=["variety", "site"]) + .transform_fold(["1931", "1932"], as_=["year", "yield"]) + .transform_calculate(calculate="datum['1932'] - datum['1931']", as_="delta") + .mark_trail() + .encode( + x=alt.X('year:O', title=None), + y=alt.Y('variety:N', title='Variety'), + size=alt.Size('yield:Q', scale=alt.Scale(range=[0, 12]), legend=alt.Legend(values=[20, 60], title='Barley Yield (bushels/acre)')), + color=alt.Color('delta:Q', scale=alt.Scale(domainMid=0), legend=alt.Legend(title='Yield Delta (%)')), + tooltip=alt.Tooltip(['year:O', 'yield:Q']), + column=alt.Column('site:N', title='Site') + + ) + .configure_view(stroke=None) + .configure_legend(orient='bottom', direction='horizontal') + .properties(title='Barley Yield comparison between 1932 and 1931') + ) + +Trail Config +^^^^^^^^^^^^ +The ``trail`` property of the top-level ``config`` object sets the default properties for all trail marks. If mark property encoding channels are specified for marks, these config values will be overridden. + +The trail config can contain any trail mark properties (except ``type``, ``style``, and ``clip``). \ No newline at end of file