Skip to content

Instantly share code, notes, and snippets.

@madams1
Last active January 7, 2021 13:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save madams1/2903c01d8231ee58c53557eb7c239cba to your computer and use it in GitHub Desktop.
Save madams1/2903c01d8231ee58c53557eb7c239cba to your computer and use it in GitHub Desktop.
library(ggplot2)
library(dplyr)
lotr <- readr::read_tsv("https://raw.githubusercontent.com/jennybc/lotr-tidy/master/data/lotr_clean.tsv") %>%
rename_all(tolower)
# bars ----------------------------------------------------------------------------------------
lotr_by_race <- lotr %>%
group_by(race) %>%
summarize(total_words = sum(words)) %>%
arrange(desc(total_words))
ggplot(lotr_by_race, aes(x = race, y = total_words)) +
geom_bar()
ggplot(lotr_by_race, aes(x = race, y = total_words)) +
geom_bar(stat = "identity")
ggplot(lotr_by_race, aes(x = reorder(race, total_words), y = total_words)) +
geom_bar(stat = "identity")
ggplot(lotr_by_race, aes(x = reorder(race, total_words), y = total_words)) +
geom_bar(stat = "identity") +
coord_flip()
ggplot(lotr_by_race, aes(x = reorder(race, total_words), y = total_words)) +
geom_bar(
stat = "identity",
fill = "steelblue"
) +
coord_flip() +
labs(
title = "LOTR Word Count by Race",
x = "Race",
y = "Word Count"
)
# lines ---------------------------------------------------------------------------------------
library(nbastatR)
convert_season_slug <- function(slug) {
as.integer(stringr::str_remove(slug, "-.*")) + 1
}
team_season_data <- teams_annual_stats() %>%
mutate(season = convert_season_slug(slugSeason))
league_avg_fg3_trend <- team_season_data %>%
filter(fg3a > 0) %>%
group_by(season) %>%
summarize(
fg3a_per_game = mean(fg3a/gp),
fg3_perc = mean(pctFG3, na.rm = TRUE)
) %>%
arrange(season) %>%
mutate(nameTeam = "League Average")
ggplot(league_avg_fg3_trend, aes(season, fg3a_per_game)) +
geom_line()
avg_fg3_comparison <- team_season_data %>%
filter(
nameTeam %in% c("Houston Rockets", "Golden State Warriors"),
fg3a > 0
) %>%
group_by(nameTeam, season) %>%
summarize(
fg3a_per_game = mean(fg3a/gp),
fg3_perc = mean(pctFG3)
) %>%
bind_rows(league_avg_fg3_trend)
# 3PT attempts
ggplot(avg_fg3_comparison, aes(
season,
fg3a_per_game,
lwd = nameTeam == "League Average",
lty = nameTeam == "League Average",
color = nameTeam
)) +
geom_line(alpha = 0.7) +
scale_size_manual(
values = c(0.8, 0.5),
guide = "none"
) +
scale_linetype(guide = "none") +
scale_color_manual(
breaks = c("Houston Rockets", "Golden State Warriors", "League Average"),
values = c(
"Golden State Warriors" = "dodgerblue3",
"Houston Rockets" = "firebrick3",
"League Average" = "grey50"
)
) +
labs(
title = "Average 3pt FGA per game",
x = "\nSeason",
y = "3pt FGA / game\n",
color = ""
) +
theme_minimal() +
theme(text = element_text(family = "Avenir"))
# 3PT %
ggplot(avg_fg3a_comparison, aes(
season,
fg3_perc,
lwd = nameTeam == "League Average",
lty = nameTeam == "League Average",
color = nameTeam
)) +
geom_line(alpha = 0.7) +
scale_size_manual(
values = c(0.8, 0.5),
guide = "none"
) +
scale_linetype(guide = "none") +
scale_y_continuous(labels = scales::percent) +
scale_color_manual(
breaks = c("Golden State Warriors", "Houston Rockets", "League Average"),
values = c(
"Golden State Warriors" = "dodgerblue3",
"Houston Rockets" = "firebrick3",
"League Average" = "grey50"
)
) +
labs(
title = "Average 3pt FG %",
x = "\nSeason",
y = "3pt FG %\n",
color = ""
) +
theme_minimal() +
theme(text = element_text(family = "Avenir"))
# points --------------------------------------------------------------------------------------
harden_shots_2019 <- teams_shots(
team_ids = 1610612745,
seasons = 2019
) %>%
filter(
idPlayer == 201935,
distanceShot < 40
) %>%
select(
locationX,
locationY,
distanceShot,
typeShot,
isShotMade
)
ggplot(harden_shots_2019, aes(locationX, locationY)) +
geom_point() +
coord_equal()
ggplot(harden_shots_2019, aes(
locationX,
-locationY,
fill = isShotMade,
color = isShotMade
)) +
geom_point(
shape = 21,
size = 2,
alpha = 0.1
) +
scale_color_discrete(guide = "none") +
scale_fill_manual(
values = c(
"FALSE" = "firebrick3",
"TRUE" = "dodgerblue3"
),
labels = c(
"FALSE" = "Missed",
"TRUE" = "Made"
)
) +
labs(
title = " James Harden Shot Chart (2018-19)\n",
color = "",
fill = ""
) +
coord_equal() +
theme_void() +
theme(legend.position="top")
# histograms/densities ------------------------------------------------------------------------
player_salaries_2019 <- hoopshype_salaries() %>%
filter(slugSeason == "2018-19") %>%
select(slugSeason, namePlayer, amountContract)
ggplot(player_salaries_2019, aes(amountContract)) +
geom_histogram() +
labs(
title = "Distribution of NBA player salary (2018-19)",
x = "Annual Salary",
y = "Number of Players"
)
ggplot(player_salaries_2019, aes(amountContract)) +
geom_histogram(
color = "white",
fill = "steelblue",
bins = 50
) +
labs(
title = "Distribution of NBA player salary (2018-19)",
x = "Annual Salary",
y = "Number of Players"
) +
scale_x_continuous(labels = scales::dollar_format()) +
theme_minimal()
ggplot(player_salaries_2019, aes(amountContract)) +
geom_density()
ggplot(player_salaries_2019, aes(amountContract)) +
geom_density(
color = "white",
fill = "steelblue",
alpha = 0.4
) +
labs(
title = "Distribution of NBA player salary (2018-19)",
x = "Annual Salary",
y = "Density"
) +
scale_x_continuous(labels = scales::dollar_format()) +
theme_minimal()
player_salaries_2019_2022 <- hoopshype_salaries() %>%
filter(slugSeason %in% c("2018-19", "2021-22"))
ggplot(player_salaries_2019_2022, aes(amountContract, fill = slugSeason)) +
geom_density(alpha = 0.3, color = "white") +
scale_fill_manual(values = c("2018-19" = "steelblue", "2021-22" = "firebrick")) +
scale_x_continuous(labels = scales::dollar_format()) +
labs(
title = "Distribution of NBA player salary (2018-19 vs. 2021-22)",
x = "Annual Salary",
y = "Density",
fill = "Season"
) +
theme_minimal()
# hexbins -------------------------------------------------------------------------------------
ggplot(harden_shots_2019, aes(locationX, -locationY)) +
geom_hex() +
coord_equal() +
theme(legend.position = "top")
ggplot(harden_shots_2019 %>% filter(typeShot == "3PT Field Goal"), aes(locationX, -locationY)) +
geom_hex(color = "white") +
scale_fill_viridis_c(breaks = c(2, 4, 6, 8, 10, 12)) +
# coord_equal() +
labs(
title = " James Harden 3PT Shot Chart (2018-19)\n",
fill = ""
) +
coord_equal() +
theme_void() +
theme(
text = element_text(family = "Avenir"),
legend.position = "bottom"
)
# faceting ------------------------------------------------------------------------------------
lotr_by_film_race <- lotr %>%
group_by(film, race) %>%
summarize(total_words = sum(words)) %>%
arrange(desc(total_words))
ggplot(lotr_by_film_race, aes(x = reorder(race, total_words), y = total_words)) +
geom_bar(
stat = "identity",
fill = "steelblue"
) +
coord_flip() +
labs(
title = "LOTR Word Count by Film and Race",
x = "Race",
y = "Word Count"
) +
facet_wrap(~film) +
theme_bw() +
theme(text = element_text(family = "Avenir"))
rockets_players <- c(
"James Harden",
"Chris Paul",
"Clint Capela",
"Eric Gordon"
)
rockets_shots_2019 <- teams_shots(
team_ids = 1610612745,
seasons = 2019
) %>%
filter(
distanceShot < 40,
namePlayer %in% rockets_players
) %>%
select(
namePlayer,
yearSeason,
locationX,
locationY,
distanceShot,
typeShot,
isShotMade
)
ggplot(rockets_shots_2019, aes(locationX, -locationY)) +
geom_hex(color = "white") +
scale_fill_viridis_c() +
coord_equal() +
labs(
title = " Rockets Player Shot Charts (2018-19)\n",
fill = ""
) +
facet_wrap(
~namePlayer,
"free"
) +
theme_void() +
theme(
text = element_text(family = "Avenir"),
legend.position = "bottom"
)
# functional plots ----------------------------------------------------------------------------
compare_3p_fga <- function(team_1, team_2) {
color_values <- c("dodgerblue3", "firebrick3", "grey50") %>%
setNames(c(team_1, team_2, "League Average"))
league_trend <- team_season_data %>%
filter(fg3a > 0) %>%
group_by(season) %>%
summarize(
fg3a_per_game = mean(fg3a/gp),
fg3_perc = mean(pctFG3, na.rm = TRUE)
) %>%
arrange(season) %>%
mutate(nameTeam = "League Average")
fg3_comparison <- team_season_data %>%
filter(
nameTeam %in% c(team_1, team_2),
fg3a > 0
) %>%
group_by(nameTeam, season) %>%
summarize(
fg3a_per_game = mean(fg3a/gp),
fg3_perc = mean(pctFG3)
) %>%
bind_rows(league_trend)
# 3PT attempts
ggplot(fg3_comparison, aes(
season,
fg3a_per_game,
lwd = nameTeam == "League Average",
lty = nameTeam == "League Average",
color = nameTeam
)) +
geom_line(alpha = 0.7) +
scale_size_manual(
values = c(0.8, 0.5),
guide = "none"
) +
scale_linetype(guide = "none") +
scale_color_manual(
breaks = c(team_1, team_2, "League Average"),
values = color_values
) +
labs(
title = "Average 3pt FGA per game",
x = "\nSeason",
y = "3pt FGA / game\n",
color = ""
) +
theme_minimal() +
theme(text = element_text(family = "Avenir"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment