Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added plot_strike_zone to plotting.py #353

Merged
merged 3 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/images/plot_strike_zone_cease_SL.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/plot_strike_zone_cease_all.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/images/plot_strike_zone_marsh.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
57 changes: 57 additions & 0 deletions docs/plotting.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,61 @@ plot_teams(data, "HR", "BB")
```
![Plot Teams_2023_BB/HR](images/plot_teams_example.png)

---
`plot_strike_zone(data: pd.DataFrame, title: str = '', colorby: str = 'pitch_type', legend_title: str = '',
annotation: str = 'pitch_type', axis: Optional[axes.Axes] = None) -> axes.Axes:`

## Arguments

`data`: StatCast pandas.DataFrame of StatCast pitcher data

`title`: (str), default = '' Optional: Title of plot

`colorby`: (str), default = 'pitch_type', Optional: Which category to color the mark with. 'pitch_type', 'pitcher', 'description' or a column within data

`legend_title`: (str), default = based on colorby, Optional: Title for the legend

`annotation`: (str), default = 'pitch_type', Optional: What to annotate in the marker. 'pitch_type', 'release_speed', 'effective_speed', 'launch_speed', or something else in the data

`axis`: (matplotlib.axis.Axes), default = None, Optional: Axes to plot the strike zone on. If None, a new Axes will be created

# Examples

```python
from pybaseball.plotting import plot_strike_zone
from pybaseball import statcast_pitcher

data = statcast_pitcher('2022-09-03', '2022-09-03', 656302)

plot_strike_zone(data, title = "Dylan Cease's 1-hitter on Sept 3, 2022")

```

![Strike zone with Dylan Cease's pitches from 2022-09-03 overlaid](images/plot_strike_zone_cease_all.png)

```python
from pybaseball.plotting import plot_strike_zone
from pybaseball import statcast_pitcher

data = statcast_pitcher('2022-09-03', '2022-09-03', 656302)

plot_strike_zone(data.loc[data["pitch_type"] == "SL"], title = "Exit Velocities on Dylan Cease's Slider", colorby='description', annotation="launch_speed")

```

![Strike zone with Dylan Cease's Slider from 2022-09-03 overlaid](images/plot_strike_zone_cease_SL.png)


```python
from pybaseball.plotting import plot_strike_zone
from pybaseball import statcast

data = statcast('2023-04-23', '2023-04-23')

plot_strike_zone(data.loc[data["batter"] == 669016], title = "Brandon Marsh's Three True Outcome Day", colorby='pitcher', annotation="description")

```

![Strike zone with Brandon Marsh's Three True Outcome Day overlaid](images/plot_strike_zone_marsh.png)

`
1 change: 1 addition & 0 deletions pybaseball/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
from .plotting import plot_stadium
from .plotting import spraychart
from .plotting import plot_teams
from .plotting import plot_strike_zone
from .datasources.fangraphs import (fg_batting_data, fg_pitching_data, fg_team_batting_data, fg_team_fielding_data,
fg_team_pitching_data)
from .split_stats import get_splits
Expand Down
126 changes: 126 additions & 0 deletions pybaseball/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import numpy as np
import pandas as pd

from pybaseball.utils import pitch_code_to_name_map

CUR_PATH = Path(__file__).resolve().parent


Expand Down Expand Up @@ -171,6 +173,130 @@ def spraychart(data: pd.DataFrame, team_stadium: str, title: str = '', tooltips:
return base


def plot_strike_zone(data: pd.DataFrame, title: str = '', colorby: str = 'pitch_type', legend_title: str = '',
annotation: str = 'pitch_type', axis: Optional[axes.Axes] = None) -> axes.Axes:
"""
Produces a pitches overlaid on a strike zone using StatCast data

Args:
data: (pandas.DataFrame)
StatCast pandas.DataFrame of StatCast pitcher data
title: (str), default = ''
Optional: Title of plot
colorby: (str), default = 'pitch_type'
Optional: Which category to color the mark with. 'pitch_type', 'pitcher', 'description' or a column within data
legend_title: (str), default = based on colorby
Optional: Title for the legend
annotation: (str), default = 'pitch_type'
Optional: What to annotate in the marker. 'pitch_type', 'release_speed', 'effective_speed',
'launch_speed', or something else in the data
axis: (matplotlib.axis.Axes), default = None
Optional: Axes to plot the strike zone on. If None, a new Axes will be created
Returns:
A matplotlib.axes.Axes object that was used to generate the pitches overlaid on the strike zone
"""

# some things to auto adjust formatting
# make the markers really visible when fewer pitches
alpha_markers = min(0.8, 0.5 + 1 / data.shape[0])
alpha_text = alpha_markers + 0.2

# define Matplotlib figure and axis
if axis is None:
fig, axis = plt.subplots()

# add home plate to plot
home_plate_coords = [[-0.71, 0], [-0.85, -0.5], [0, -1], [0.85, -0.5], [0.71, 0]]
axis.add_patch(patches.Polygon(home_plate_coords,
edgecolor = 'darkgray',
facecolor = 'lightgray',
zorder = 0.1))

# add strike zone to plot, technically the y coords can vary by batter
axis.add_patch(patches.Rectangle((-0.71, 1.5), 2*0.71, 2,
edgecolor = 'lightgray',
fill=False,
lw=3,
zorder = 0.1))

# legend_title = ""
color_label = ""

# to avoid the SettingWithCopyWarning error
sub_data = data.copy().reset_index(drop=True)
if colorby == 'pitch_type':
color_label = 'pitch_type'

if not legend_title:
legend_title = 'Pitch Type'

elif colorby == 'description':
values = sub_data.loc[:, 'description'].str.replace('_', ' ').str.title()
sub_data.loc[:, 'desc'] = values
color_label = 'desc'

if not legend_title:
legend_title = 'Pitch Description'
elif colorby == 'pitcher':
color_label = 'player_name'

if not legend_title:
legend_title = 'Pitcher'

elif colorby == "events":
# only things where something happened
sub_data = sub_data[sub_data['events'].notna()]
sub_data['event'] = sub_data['events'].str.replace('_', ' ').str.title()
color_label = 'event'

if not legend_title:
legend_title = 'Outcome'

else:
color_label = colorby
if not legend_title:
legend_title = colorby

scatters = []
for color in sub_data[color_label].unique():
color_sub_data = sub_data[sub_data[color_label] == color]
scatters.append(axis.scatter(
color_sub_data["plate_x"],
color_sub_data['plate_z'],
s = 10**2,
label = pitch_code_to_name_map[color] if color_label == 'pitch_type' else color,
alpha = alpha_markers
))

# add an annotation at the center of the marker
if annotation:
for i, pitch_coord in zip(color_sub_data.index, zip(color_sub_data["plate_x"], color_sub_data['plate_z'])):
label_formatted = color_sub_data.loc[i, annotation]
label_formatted = label_formatted if not pd.isna(label_formatted) else ""

# these are numbers, format them that way
if annotation in ["release_speed", "effective_speed", "launch_speed"] and label_formatted != "":
label_formatted = "{:.0f}".format(label_formatted)

axis.annotate(label_formatted,
pitch_coord,
size = 7,
ha = 'center',
va = 'center',
alpha = alpha_text)

axis.set_xlim(-4, 4)
axis.set_ylim(-1.5, 7)
axis.axis('off')

axis.legend(handles=scatters, title=legend_title, bbox_to_anchor=(0.7, 1), loc='upper left')

plt.title(title)
plt.show()

return axis


def plot_bb_profile(df: pd.DataFrame, parameter: Optional[str] = "launch_angle") -> None:
"""Plots a given StatCast parameter split by bb_type

Expand Down
4 changes: 2 additions & 2 deletions pybaseball/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ def get_first_season(team: str, include_equivalents: bool = True) -> Optional[in
2020: (date(2020, 7, 23), date(2020, 10, 27))
}

pitch_codes = ["FF", "SIFT", "CH", "CUKC", "FC", "SL", "FS", "ALL"] # note: all doesn't work in words, we'll have some special handling
pitch_names = ["4-Seamer", "Sinker", "Changeup", "Curveball", "Cutter", "Slider", "Sinker"]
pitch_codes = ["FF", "CU", "CH", "FC", "EP", "FO", "KN", "KC", "SC", "SI", "SL", "FS", "FT", "ST", "SV", "SIFT", "CUKC", "ALL"] # note: all doesn't work in words, we'll have some special handling
pitch_names = ["4-Seamer", "Curveball", "Changeup", "Cutter", "Eephus", "Forkball", "Knuckleball", "Knuckle-curve", "Screwball", "Sinker", "Slider", "Splitter", "2-Seamer", "Sweeper", "Slurve", "Sinker", "Curveball"]
pitch_names_upper = [p.upper() for p in pitch_names]

# including all the codes to themselves makes this simpler later
Expand Down