Skip to content

Commit 28477b0

Browse files
authored
Rank ordered logit implementation (#7)
* Add basic Plackett-Luce model + validation * add new ROL models * Comparing beta and rank models * Fix title of team rank img * rerun comparison with 8000 posterior samples * fix variance image * add ar model * update model comparison code * start updating main folder * Move to rank model in main folder - implemented weather and circuit models - moved to stan for everything - performed first version of LOO model comparison * First version of pp check for ROL model * Update 05_check.R * Reparameterize for better sampling performance * update pp check to %>% pipe * update model structure comparison to ncp * update model comparison code + images * Moved all scripts to new rank-ordered logit model * Update readme + model_comparison readme
1 parent eecac30 commit 28477b0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+2259
-438
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,12 @@ vignettes/*.pdf
3434

3535
# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
3636
rsconnect/
37+
38+
# Stan executables
39+
stan_models/*.exe
40+
model_comparison/stan_models/*.exe
41+
model_comparison/stan_models/old/*.exe
42+
43+
# stan fits
44+
model_comparison/fits/*.rds
45+

01_prep_data.R

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Code accompanying the manuscript "Bayesian Analysis of Formula One Race Results"
2-
# Last edited 2022-02-17 by @vankesteren
2+
# Last edited 2022-12-11 by @vankesteren
33
# Contents: data preparation, data joining from database f1db_csv.
44
library(tidyverse)
55
library(lubridate)
@@ -115,7 +115,7 @@ results_dat <-
115115
left_join(tab_status, by = "statusId") %>%
116116
select(raceId, positionText, positionOrder, fastestLapTime, driverRef, constructorRef, status)
117117

118-
# Joining & cleaning ----
118+
# Joining, cleaning & saving ----
119119
f1_dat <-
120120
race_dat %>%
121121
left_join(results_dat, by = "raceId") %>%
@@ -125,4 +125,30 @@ f1_dat <-
125125
select(driver, constructor, year, round, circuit, position, weather_type, circuit_type, status) %>%
126126
mutate(year = as.integer(year), round = as.integer(round), position = as.integer(position))
127127

128+
# convert to factors
129+
f1_dat <-
130+
f1_dat %>%
131+
mutate(
132+
status = as_factor(status),
133+
constructor = as_factor(constructor),
134+
driver = as_factor(driver),
135+
weather_type = as_factor(weather_type),
136+
circuit_type = as_factor(circuit_type)
137+
)
138+
139+
# Adding a finished indicator
140+
compute_classified <- function(status) {
141+
out <- rep(FALSE, length(status))
142+
# anyone above the last person still running (finished or +n laps is classified)
143+
last_classified <- max(which(status == "Finished" | str_starts(status, "\\+")))
144+
out[1:last_classified] <- TRUE
145+
out
146+
}
147+
148+
f1_dat <-
149+
f1_dat %>%
150+
group_by(year, round) %>%
151+
mutate(finished = compute_classified(status)) %>%
152+
ungroup()
153+
128154
write_rds(f1_dat, "dat/f1_dat.rds")

02_eda.R

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Code accompanying the manuscript "Bayesian Analysis of Formula One Race Results"
2+
# Last edited 2022-12-11 by @vankesteren
3+
# Contents: status filtering, some EDA
4+
library(tidyverse)
5+
library(firatheme)
6+
7+
# Data loading ----
8+
f1_dat <- read_rds("dat/f1_dat.rds")
9+
f1_dat_finished <- f1_dat %>% filter(finished)
10+
11+
# Some EDA ----
12+
# finish position
13+
f1_dat_finished %>%
14+
ggplot(aes(x = factor(position))) +
15+
geom_bar(fill = firaCols[4]) +
16+
theme_fira() +
17+
labs(
18+
title = "Distribution of finish positions",
19+
subtitle = "F1 hybrid era (2014-2021)",
20+
x = "Finish position",
21+
y = "Count"
22+
)
23+
24+
ggsave("img/eda_finish_position.png", width = 9, height = 6, bg = "white")
25+
26+
# basic plot
27+
f1_dat_finished %>%
28+
filter(driver %in% c("hamilton", "raikkonen", "giovinazzi"), year > 2015) %>%
29+
ggplot(aes(x = factor(position), fill = driver)) +
30+
geom_bar(position = position_dodge(preserve = "single")) +
31+
theme_fira() +
32+
scale_fill_fira() +
33+
labs(
34+
x = "Finish position",
35+
y = "Count",
36+
title = "Different drivers' finish positions",
37+
subtitle = "Conditional on finishing the race",
38+
fill = ""
39+
) +
40+
theme(legend.position = "top") +
41+
facet_wrap(~year)
42+
43+
ggsave("img/eda_finish_drivers.png", width = 9, height = 6, bg = "white")
44+
45+
# average finish positions for 2021 season
46+
f1_dat_finished %>%
47+
filter(year == 2021) %>%
48+
group_by(driver) %>%
49+
summarize(mean_position = mean(position, na.rm = TRUE), sem = sd(position, na.rm = TRUE) / sqrt(n())) %>%
50+
mutate(driver = fct_reorder(driver, -mean_position)) %>%
51+
ggplot(aes(y = driver,
52+
x = mean_position,
53+
xmin = mean_position - 2*sem,
54+
xmax = mean_position + 2*sem)) +
55+
geom_pointrange(size = .4) +
56+
theme_fira() +
57+
labs(
58+
y = "",
59+
x = "Position (mean ± 2⋅se)",
60+
title = "2021 Season Finish Positions",
61+
subtitle = "Conditional on finishing the race"
62+
)
63+
64+
ggsave("img/eda_finish_2021.png", width = 9, height = 6, bg = "white")

02_process_data.R

Lines changed: 0 additions & 91 deletions
This file was deleted.

03_model.R

Lines changed: 33 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -2,71 +2,44 @@
22
# Last edited 2021-05-16 by @vankesteren
33
# Contents: Creating and estimating models
44
library(tidyverse)
5-
library(brms)
6-
7-
# read data
8-
f1_dat_finished <- read_rds("dat/f1_dat_finished.rds")
9-
10-
# basic model
11-
fit_basic <- brm(
12-
formula = prop_trans ~ 0 + (1 | driver) + (1 | driver:year) + (1 | constructor) + (1 | constructor:year),
13-
family = Beta(),
14-
data = f1_dat_finished,
15-
backend = "cmdstanr",
16-
chains = 4,
17-
cores = 4,
18-
threads = 3,
19-
warmup = 1000,
20-
iter = 3500
5+
library(cmdstanr)
6+
7+
# read & prepare stan data
8+
f1_dat <-
9+
read_rds("dat/f1_dat.rds") %>%
10+
filter(finished)
11+
12+
stan_data <- list(
13+
num_obs = f1_dat %>% nrow(),
14+
num_drivers = f1_dat %>% pull(driver) %>% nlevels(),
15+
num_teams = f1_dat %>% pull(constructor) %>% nlevels(),
16+
num_races = f1_dat %>% group_by(year, round) %>% n_groups(),
17+
num_seasons = f1_dat %>% group_by(year) %>% n_groups(),
18+
ranked_driver_ids = f1_dat %>% arrange(year, round, position) %>% pull(driver) %>% as.integer(),
19+
ranked_team_ids = f1_dat %>% arrange(year, round, position) %>% pull(constructor) %>% as.integer(),
20+
num_entrants = f1_dat %>% group_by(year, round) %>% summarize(count = n()) %>% pull(count),
21+
season_id = f1_dat %>% group_by(year, round) %>% summarize(y = factor(first(year))) %>% pull(y) %>% as.integer(),
22+
wet_weather = f1_dat %>% group_by(year, round) %>% summarize(w = first(weather_type)) %>% pull(w) %>% as.integer() - 1L,
23+
prm_circuit = f1_dat %>% group_by(year, round) %>% summarize(c = first(circuit_type)) %>% pull(c) %>% as.integer() - 1L
2124
)
2225

23-
summary(fit_basic)
24-
write_rds(fit_basic, "fit/fit_basic.rds")
26+
# basic model
27+
mod_basic <- cmdstan_model("stan_models/basic_model.stan")
28+
fit_basic <- mod_basic$sample(stan_data, chains = 8, parallel_chains = 8, iter_sampling = 1000)
29+
fit_basic$save_object("fit/basic.rds")
2530

2631
# weather model
27-
fit_weather <- brm(
28-
formula = prop_trans ~ 0 + (1 + weather_type | driver) + (1 | driver:year) + (1 | constructor) + (1 | constructor:year),
29-
family = Beta(),
30-
data = f1_dat_finished,
31-
backend = "cmdstanr",
32-
chains = 4,
33-
cores = 4,
34-
threads = 3,
35-
warmup = 1000,
36-
iter = 3500
37-
)
38-
39-
summary(fit_weather)
40-
write_rds(fit_weather, "fit/fit_weather.rds")
32+
mod_weather <- cmdstan_model("stan_models/weather_model.stan")
33+
fit_weather <- mod_weather$sample(stan_data, chains = 8, parallel_chains = 8, iter_sampling = 1000)
34+
fit_weather$save_object("fit/weather.rds")
4135

4236
# circuit type model
43-
fit_circuit <- brm(
44-
formula = prop_trans ~ 0 + (1 | driver) + (1 | driver:year) + (1 + circuit_type | constructor) + (1 | constructor:year),
45-
family = Beta(),
46-
data = f1_dat_finished,
47-
backend = "cmdstanr",
48-
chains = 4,
49-
cores = 4,
50-
threads = 3,
51-
warmup = 1000,
52-
iter = 3500
53-
)
54-
55-
summary(fit_circuit)
56-
write_rds(fit_circuit, "fit/fit_circuit.rds")
37+
mod_circuit <- cmdstan_model("stan_models/circuit_model.stan")
38+
fit_circuit <- mod_circuit$sample(stan_data, chains = 8, parallel_chains = 8, iter_sampling = 1000)
39+
fit_circuit$save_object("fit/circuit.rds")
5740

5841
# weather + circuit type model
59-
fit_weather_circuit <- brm(
60-
formula = prop_trans ~ 0 + (1 + weather_type | driver) + (1 | driver:year) + (1 + circuit_type | constructor) + (1 | constructor:year),
61-
family = Beta(),
62-
data = f1_dat_finished,
63-
backend = "cmdstanr",
64-
chains = 4,
65-
cores = 4,
66-
threads = 3,
67-
warmup = 1000,
68-
iter = 3500
69-
)
70-
71-
summary(fit_weather_circuit)
72-
write_rds(fit_weather_circuit, "fit/fit_weather_circuit.rds")
42+
# weather model
43+
mod_all <- cmdstan_model("stan_models/weather_circuit_model.stan")
44+
fit_all <- mod_all$sample(stan_data, chains = 8, parallel_chains = 8, iter_sampling = 1000)
45+
fit_all$save_object("fit/weather_circuit.rds")

04_compare.R

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,31 @@
11
# Code accompanying the manuscript "Bayesian Analysis of Formula One Race Results"
2-
# Last edited 2021-05-16 by @vankesteren
2+
# Last edited 2021-12-12 by @vankesteren
33
# Contents: Performing model comparison
44
library(tidyverse)
5-
library(brms)
5+
library(cmdstanr)
6+
library(loo)
67
library(xtable)
78

8-
options(mc.cores = 12)
99

1010
# which model is best? Compare using LOO
11-
fit_basic <- read_rds("fit/fit_basic.rds") %>% add_criterion("loo")
12-
fit_circuit <- read_rds("fit/fit_circuit.rds") %>% add_criterion("loo")
13-
fit_weather <- read_rds("fit/fit_weather.rds") %>% add_criterion("loo")
14-
fit_all <- read_rds("fit/fit_weather_circuit.rds") %>% add_criterion("loo")
11+
fit_basic <- read_rds("fit/basic.rds")
12+
fit_weather <- read_rds("fit/weather.rds")
13+
fit_circuit <- read_rds("fit/circuit.rds")
14+
fit_all <- read_rds("fit/weather_circuit.rds")
15+
16+
loo_basic <- fit_basic$loo(cores = 11)
17+
loo_weather <- fit_weather$loo(cores = 11)
18+
loo_circuit <- fit_circuit$loo(cores = 11)
19+
loo_all <- fit_all$loo(cores = 11)
20+
1521

1622
loo_results <- loo_compare(
17-
fit_basic,
18-
fit_circuit,
19-
fit_weather,
20-
fit_all,
21-
model_names = c("Basic", "Circuit", "Weather", "Circuit + Weather")
23+
list(
24+
"Basic" = loo_basic,
25+
"Weather" = loo_weather,
26+
"Circuit" = loo_circuit,
27+
"Circuit + Weather" = loo_all
28+
)
2229
)
2330

2431
loo_results
@@ -29,12 +36,10 @@ write_rds(loo_results, "fit/loo_results.rds")
2936
xtable::xtable(loo_results)
3037

3138
# elpd_diff se_diff
32-
# Basic 0.0 0.0
33-
# Circuit -1.0 0.5
34-
# Weather -1.2 1.3
35-
# Circuit + Weather -1.9 1.4
39+
# Circuit 0.0 0.0
40+
# Basic -2.3 6.1
41+
# Circuit + Weather -2.7 2.2
42+
# Weather -4.7 6.3
3643

37-
# basic works best
44+
# Circuit works best, but not that different from basic
3845

39-
# comparing basic to next best model
40-
bayes_factor(fit_basic, fit_circuit)

0 commit comments

Comments
 (0)