Skip to content

Instantly share code, notes, and snippets.

from bs4 import BeautifulSoup
from pybaseball.league_batting_stats import get_table, session
START_DT = "2021-05-02"
def fetch_date(dt):
url = (
"http://www.baseball-reference.com/leagues/daily.cgi?"
"user_team=&bust_cache=&type=b&lastndays=7&dates=fromandto&"
f"fromandto={START_DT}.{dt}&level=mlb&franch="
@bdilday
bdilday / grstand1.R
Created August 24, 2020 11:51
graphical mlb standings in R
library(dplyr)
library(ggplot2)
library(ggrepel)
library(rvest)
library(stringr)
fg_current_standings <- function() {
url = "https://www.fangraphs.com/depthcharts.aspx?position=Standings"
@bdilday
bdilday / ncaa_mbb_current_results_2019.csv
Last active March 30, 2019 12:10
ncaa march madness results 2019
game result
2019_1242_1318 1
2019_1266_1293 0
2019_1233_1314 0
2019_1330_1345 0
2019_1199_1436 1
2019_1276_1285 1
2019_1124_1393 1
2019_1279_1328 0
2019_1125_1396 1
@bdilday
bdilday / war_data.csv
Last active September 2, 2018 16:14
war_data.csv
We can't make this file beautiful and searchable because it's too large.
"playerID","yearID","WAA","WAR","WAR_def","WAR_off","POS"
"aaronha01",1954,-0.61,1.27,-0.71,1.98,"OF"
"aaronha01",1955,3.77,6.25,-0.01,6.26,"OF"
"aaronha01",1956,4.63,7.18,0.65,6.53,"OF"
"aaronha01",1957,5.5,7.97,-0.13,8.1,"OF"
"aaronha01",1958,4.84,7.32,0.21,7.11,"OF"
"aaronha01",1959,6.13,8.64,-1.08,9.72,"OF"
"aaronha01",1960,5.47,7.98,0.77,7.21,"OF"
"aaronha01",1961,6.86,9.4,2.02,7.38,"OF"
@bdilday
bdilday / catcher_defense_data.csv
Last active September 1, 2018 13:30
catcher_defense_data.csv
We can't make this file beautiful and searchable because it's too large.
"game_id","year_id","bat_home_id","off_score","def_pitcher","def_catcher","off_team","def_team","park_id","park"
"WS2196109210",1961,0,3,"danib102_1961","retzk101_1961","MIN_1961","WS2_1961","WS2_1961","WS2"
"WS2196109190",1961,0,2,"mcclj104_1961","retzk101_1961","MIN_1961","WS2_1961","WS2_1961","WS2"
"WS2196109150",1961,0,1,"danib102_1961","retzk101_1961","KC1_1961","WS2_1961","WS2_1961","WS2"
"WS2196109130",1961,0,1,"hobae101_1961","retzk101_1961","BOS_1961","WS2_1961","WS2_1961","WS2"
"WS2196109120",1961,0,0,"burnp102_1961","retzk101_1961","BOS_1961","WS2_1961","WS2_1961","WS2"
"WS2196109032",1961,0,2,"gablg102_1961","dalep101_1961","CHA_1961","WS2_1961","WS2_1961","WS2"
"WS2196109170",1961,0,2,"maesh101_1961","retzk101_1961","KC1_1961","WS2_1961","WS2_1961","WS2"
"WS2196109160",1961,0,0,"burnp102_1961","retzk101_1961","KC1_1961","WS2_1961","WS2_1961","WS2"
@bdilday
bdilday / mlb_stadia_paths.csv
Created June 5, 2018 12:24
MLB stadium paths
team x y segment
angels 26.27 104.81 outfield_outer
angels 25.932060362453754 98.80779459065957 outfield_outer
angels 26.551757956401122 92.8655601538784 outfield_outer
angels 29.069970572128522 87.4573503249933 outfield_outer
angels 32.986155173212474 82.91330959433427 outfield_outer
angels 37.08065184258191 78.5115488314632 outfield_outer
angels 41.202800272760506 74.13565416464868 outfield_outer
angels 45.356441744254404 69.7896729842234 outfield_outer
angels 49.49117542835149 65.42575404594743 outfield_outer
library(baseballr)
library(dplyr)
obtain_data = function(type="postgres",
start_date="2017-03-29",
end_date="2017-10-03", infile=NULL) {
if (type=="postgres") {
# postgres db connection here
@bdilday
bdilday / scrape_statcast_expected_stats.R
Last active May 17, 2018 01:27
scrape statcast expected stats
library(dplyr)
library(ggplot2)
library(rvest)
library(jsonlite)
scrape_statcast_expected_stats = function(year=2018, min_pa=25) {
url = sprintf("https://baseballsavant.mlb.com/expected_statistics?type=batter&year=%s&position=&team=&min=%d", year, min_pa)
h = xml2::read_html(url)
s = html_nodes(h, "script")[[10]]
@bdilday
bdilday / top10warfranch.R
Last active April 15, 2018 13:55
R graph - top10 war by franchise
library(readr)
library(ggplot2)
library(dplyr)
library(Lahman)
br_war = read_csv("https://www.baseball-reference.com/data/war_daily_bat.txt")
m = Lahman::Teams
ndf = br_war %>% merge(m %>% select(teamIDBR, yearID, name, franchID), by.x=c("team_ID", "year_ID"), by.y=c("teamIDBR", "yearID")) %>% group_by(franchID, name, player_ID) %>% summarise(w=sum(as.numeric(WAR), na.rm=TRUE))
xx = ndf %>% arrange(franchID, -w) %>% mutate(i=row_number()) %>% group_by(franchID, name) %>% summarise(m=max(i)) %>% arrange(franchID,-m) %>% mutate(ir=row_number()) %>% group_by(franchID) %>% mutate(m2=max(m), m=sum(m)) %>% filter(ir==1) %>% select(franchID, m=m2)
p = ndf %>% merge(xx, by="franchID") %>% group_by(franchID) %>% arrange(-w) %>% mutate(i=row_number()) %>% filter(i<=10) %>% filter(m>=110) %>% ggplot(aes(x=i, y=w)) + geom_bar(stat='identity') + facet_wrap(~franchID) + theme_minimal() + labs(x='franchise rank', y='career WAR')
@bdilday
bdilday / trajectory_calculator1.R
Last active February 20, 2018 03:54
trajectory calculator
trajectory_pars <- list(
# constants
mass = 5.125, # oz,
circumference = 9.125, # in
beta = 1.217e-4, # 1 / meter
cd0 = 0.3008,
cdspin = 0.0292,
cl0 = 0.583,
cl1 = 2.333,