Skip to content

Instantly share code, notes, and snippets.

@steveharoz
Last active April 3, 2024 16:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save steveharoz/a8955fd91a8c9b7822dc3990c03b3283 to your computer and use it in GitHub Desktop.
Save steveharoz/a8955fd91a8c9b7822dc3990c03b3283 to your computer and use it in GitHub Desktop.
Replication rate by effect size
library(tidyverse)
# simulate one experiment
simulate = function(subject_count = 20, effect_size = 0) {
a = rnorm(subject_count/2)
b = rnorm(subject_count/2, effect_size)
tibble(
p = t.test(a, b, alternative = "less")$p.value,
d = (mean(b) - mean(a)) / sd(c(a-mean(a), b-mean(b)))
)
}
# subject count
N_original = 20
N_replication = N_original * 2.5
data = expand_grid(
effect_size = seq(0, 1, 0.1),
index = 1:2000 # simulations per effect size
) %>%
rowwise() %>%
mutate(original = simulate(N_original, effect_size)) %>%
mutate(replication = simulate(N_replication, effect_size)) %>%
ungroup() %>%
# get the p-value and cohens d from original and replication
unnest(original, names_sep="_") %>%
unnest(replication, names_sep="_")
# done!
beepr::beep(2)
# check if replicate
data = data %>%
mutate(significant_original = original_p<0.05) %>%
mutate(significant_replication = replication_p<0.05) %>%
mutate(has_replicated = significant_original == significant_replication) %>%
mutate(replicated_and_wrong = has_replicated & (sign(effect_size) != significant_original))
ggplot(data) +
aes(x=original_d, y=as.numeric(has_replicated), color=factor(effect_size), fill=factor(effect_size)) +
geom_smooth(linewidth=1, fullrange=TRUE, method = glm, method.args= list(family="binomial")) +
scale_x_continuous(limits = c(0, 1), expand=c(0,0)) +
scale_y_continuous(labels = scales::label_percent()) +
theme_classic(12) +
labs(x = "Cohen's d measured from original experiment",
y = NULL,
color = "Actual effect size", fill = "Actual effect size",
title = "Replication rate by effect size",
subtitle = "Replication: p-values are both <0.05 or both >0.05",
caption = "Original N = 20. Replication N = 50.")
ggplot(data) +
aes(x=original_d, y=as.numeric(replicated_and_wrong), color=factor(effect_size), fill=factor(effect_size)) +
geom_smooth(linewidth=1, method = glm, method.args= list(family="binomial")) +
scale_x_continuous(limits = c(0, 1), expand=c(0,0)) +
scale_y_continuous(labels = scales::label_percent()) +
scale_color_hue(aesthetics = c("color", "fill"), breaks = c(seq(.1, 1, .1), 0)) +
theme_classic(12) +
labs(x = "Cohen's d measured from original experiment",
y = NULL,
color = "Actual effect size", fill = "Actual effect size",
title = "Rate that both original and replication results mislead",
subtitle = "\"Mislead\" means p<0.05 for effect size 0,\n and p>0.05 for effect size > 0",
caption = "Original N = 20. Replication N = 50.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment