diff --git a/docs/images/plot_strike_zone_cease_SL.png b/docs/images/plot_strike_zone_cease_SL.png new file mode 100644 index 00000000..04321b37 Binary files /dev/null and b/docs/images/plot_strike_zone_cease_SL.png differ diff --git a/docs/images/plot_strike_zone_cease_all.png b/docs/images/plot_strike_zone_cease_all.png new file mode 100644 index 00000000..87ced058 Binary files /dev/null and b/docs/images/plot_strike_zone_cease_all.png differ diff --git a/docs/images/plot_strike_zone_marsh.png b/docs/images/plot_strike_zone_marsh.png new file mode 100644 index 00000000..5a39560e Binary files /dev/null and b/docs/images/plot_strike_zone_marsh.png differ diff --git a/docs/plotting.md b/docs/plotting.md index 30189ada..73636f1c 100644 --- a/docs/plotting.md +++ b/docs/plotting.md @@ -131,4 +131,61 @@ plot_teams(data, "HR", "BB") ``` ![Plot Teams_2023_BB/HR](images/plot_teams_example.png) +--- +`plot_strike_zone(data: pd.DataFrame, title: str = '', colorby: str = 'pitch_type', legend_title: str = '', + annotation: str = 'pitch_type', axis: Optional[axes.Axes] = None) -> axes.Axes:` + +## Arguments + +`data`: StatCast pandas.DataFrame of StatCast pitcher data + +`title`: (str), default = '' Optional: Title of plot + +`colorby`: (str), default = 'pitch_type', Optional: Which category to color the mark with. 'pitch_type', 'pitcher', 'description' or a column within data + +`legend_title`: (str), default = based on colorby, Optional: Title for the legend + +`annotation`: (str), default = 'pitch_type', Optional: What to annotate in the marker. 'pitch_type', 'release_speed', 'effective_speed', 'launch_speed', or something else in the data + +`axis`: (matplotlib.axis.Axes), default = None, Optional: Axes to plot the strike zone on. If None, a new Axes will be created + +# Examples + +```python +from pybaseball.plotting import plot_strike_zone +from pybaseball import statcast_pitcher + +data = statcast_pitcher('2022-09-03', '2022-09-03', 656302) + +plot_strike_zone(data, title = "Dylan Cease's 1-hitter on Sept 3, 2022") + +``` + +![Strike zone with Dylan Cease's pitches from 2022-09-03 overlaid](images/plot_strike_zone_cease_all.png) + +```python +from pybaseball.plotting import plot_strike_zone +from pybaseball import statcast_pitcher + +data = statcast_pitcher('2022-09-03', '2022-09-03', 656302) + +plot_strike_zone(data.loc[data["pitch_type"] == "SL"], title = "Exit Velocities on Dylan Cease's Slider", colorby='description', annotation="launch_speed") + +``` + +![Strike zone with Dylan Cease's Slider from 2022-09-03 overlaid](images/plot_strike_zone_cease_SL.png) + + +```python +from pybaseball.plotting import plot_strike_zone +from pybaseball import statcast + +data = statcast('2023-04-23', '2023-04-23') + +plot_strike_zone(data.loc[data["batter"] == 669016], title = "Brandon Marsh's Three True Outcome Day", colorby='pitcher', annotation="description") + +``` + +![Strike zone with Brandon Marsh's Three True Outcome Day overlaid](images/plot_strike_zone_marsh.png) + ` \ No newline at end of file diff --git a/pybaseball/__init__.py b/pybaseball/__init__.py index bcb81659..1089d0a7 100644 --- a/pybaseball/__init__.py +++ b/pybaseball/__init__.py @@ -95,6 +95,7 @@ from .plotting import plot_stadium from .plotting import spraychart from .plotting import plot_teams +from .plotting import plot_strike_zone from .datasources.fangraphs import (fg_batting_data, fg_pitching_data, fg_team_batting_data, fg_team_fielding_data, fg_team_pitching_data) from .split_stats import get_splits diff --git a/pybaseball/plotting.py b/pybaseball/plotting.py index c3812413..32eb62c4 100644 --- a/pybaseball/plotting.py +++ b/pybaseball/plotting.py @@ -11,6 +11,8 @@ import numpy as np import pandas as pd +from pybaseball.utils import pitch_code_to_name_map + CUR_PATH = Path(__file__).resolve().parent @@ -171,6 +173,130 @@ def spraychart(data: pd.DataFrame, team_stadium: str, title: str = '', tooltips: return base +def plot_strike_zone(data: pd.DataFrame, title: str = '', colorby: str = 'pitch_type', legend_title: str = '', + annotation: str = 'pitch_type', axis: Optional[axes.Axes] = None) -> axes.Axes: + """ + Produces a pitches overlaid on a strike zone using StatCast data + + Args: + data: (pandas.DataFrame) + StatCast pandas.DataFrame of StatCast pitcher data + title: (str), default = '' + Optional: Title of plot + colorby: (str), default = 'pitch_type' + Optional: Which category to color the mark with. 'pitch_type', 'pitcher', 'description' or a column within data + legend_title: (str), default = based on colorby + Optional: Title for the legend + annotation: (str), default = 'pitch_type' + Optional: What to annotate in the marker. 'pitch_type', 'release_speed', 'effective_speed', + 'launch_speed', or something else in the data + axis: (matplotlib.axis.Axes), default = None + Optional: Axes to plot the strike zone on. If None, a new Axes will be created + Returns: + A matplotlib.axes.Axes object that was used to generate the pitches overlaid on the strike zone + """ + + # some things to auto adjust formatting + # make the markers really visible when fewer pitches + alpha_markers = min(0.8, 0.5 + 1 / data.shape[0]) + alpha_text = alpha_markers + 0.2 + + # define Matplotlib figure and axis + if axis is None: + fig, axis = plt.subplots() + + # add home plate to plot + home_plate_coords = [[-0.71, 0], [-0.85, -0.5], [0, -1], [0.85, -0.5], [0.71, 0]] + axis.add_patch(patches.Polygon(home_plate_coords, + edgecolor = 'darkgray', + facecolor = 'lightgray', + zorder = 0.1)) + + # add strike zone to plot, technically the y coords can vary by batter + axis.add_patch(patches.Rectangle((-0.71, 1.5), 2*0.71, 2, + edgecolor = 'lightgray', + fill=False, + lw=3, + zorder = 0.1)) + + # legend_title = "" + color_label = "" + + # to avoid the SettingWithCopyWarning error + sub_data = data.copy().reset_index(drop=True) + if colorby == 'pitch_type': + color_label = 'pitch_type' + + if not legend_title: + legend_title = 'Pitch Type' + + elif colorby == 'description': + values = sub_data.loc[:, 'description'].str.replace('_', ' ').str.title() + sub_data.loc[:, 'desc'] = values + color_label = 'desc' + + if not legend_title: + legend_title = 'Pitch Description' + elif colorby == 'pitcher': + color_label = 'player_name' + + if not legend_title: + legend_title = 'Pitcher' + + elif colorby == "events": + # only things where something happened + sub_data = sub_data[sub_data['events'].notna()] + sub_data['event'] = sub_data['events'].str.replace('_', ' ').str.title() + color_label = 'event' + + if not legend_title: + legend_title = 'Outcome' + + else: + color_label = colorby + if not legend_title: + legend_title = colorby + + scatters = [] + for color in sub_data[color_label].unique(): + color_sub_data = sub_data[sub_data[color_label] == color] + scatters.append(axis.scatter( + color_sub_data["plate_x"], + color_sub_data['plate_z'], + s = 10**2, + label = pitch_code_to_name_map[color] if color_label == 'pitch_type' else color, + alpha = alpha_markers + )) + + # add an annotation at the center of the marker + if annotation: + for i, pitch_coord in zip(color_sub_data.index, zip(color_sub_data["plate_x"], color_sub_data['plate_z'])): + label_formatted = color_sub_data.loc[i, annotation] + label_formatted = label_formatted if not pd.isna(label_formatted) else "" + + # these are numbers, format them that way + if annotation in ["release_speed", "effective_speed", "launch_speed"] and label_formatted != "": + label_formatted = "{:.0f}".format(label_formatted) + + axis.annotate(label_formatted, + pitch_coord, + size = 7, + ha = 'center', + va = 'center', + alpha = alpha_text) + + axis.set_xlim(-4, 4) + axis.set_ylim(-1.5, 7) + axis.axis('off') + + axis.legend(handles=scatters, title=legend_title, bbox_to_anchor=(0.7, 1), loc='upper left') + + plt.title(title) + plt.show() + + return axis + + def plot_bb_profile(df: pd.DataFrame, parameter: Optional[str] = "launch_angle") -> None: """Plots a given StatCast parameter split by bb_type diff --git a/pybaseball/utils.py b/pybaseball/utils.py index c334fc55..df24ea9d 100644 --- a/pybaseball/utils.py +++ b/pybaseball/utils.py @@ -118,8 +118,8 @@ def get_first_season(team: str, include_equivalents: bool = True) -> Optional[in 2020: (date(2020, 7, 23), date(2020, 10, 27)) } -pitch_codes = ["FF", "SIFT", "CH", "CUKC", "FC", "SL", "FS", "ALL"] # note: all doesn't work in words, we'll have some special handling -pitch_names = ["4-Seamer", "Sinker", "Changeup", "Curveball", "Cutter", "Slider", "Sinker"] +pitch_codes = ["FF", "CU", "CH", "FC", "EP", "FO", "KN", "KC", "SC", "SI", "SL", "FS", "FT", "ST", "SV", "SIFT", "CUKC", "ALL"] # note: all doesn't work in words, we'll have some special handling +pitch_names = ["4-Seamer", "Curveball", "Changeup", "Cutter", "Eephus", "Forkball", "Knuckleball", "Knuckle-curve", "Screwball", "Sinker", "Slider", "Splitter", "2-Seamer", "Sweeper", "Slurve", "Sinker", "Curveball"] pitch_names_upper = [p.upper() for p in pitch_names] # including all the codes to themselves makes this simpler later