Skip to content

Instantly share code, notes, and snippets.

@steveharoz
Last active April 26, 2024 09:50
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save steveharoz/452929e367fc8717e9e742d81ab5e195 to your computer and use it in GitHub Desktop.
Save steveharoz/452929e367fc8717e9e742d81ab5e195 to your computer and use it in GitHub Desktop.
Uncanny Mountain simulations
---
title: "How likely is the uncanny mountain?"
author: "by Steve Haroz"
output:
github_document:
params:
use_cache: TRUE
---
```{r setup, warning = FALSE, message = FALSE}
library(tidyverse)
library(patchwork)
library(multidplyr)
```
```{r parameters}
# significant level
P_HI = 0.05
P_LO = 0.01
```
```{r initialize}
simulations = expand.grid(
effect_size = seq(0, 1, 0.05),
N = c(seq(20, 100, 5), seq(120, 200, 10), 250, 500, 1000, 5000, 10000), # actually N/2
rep = 1:10000
)
```
```{r parallel}
core_count = parallel::detectCores() - 1
cluster = new_cluster(core_count)
```
```{r simulate, cache=TRUE}
start = Sys.time()
simulations = simulations %>%
mutate(row = 1:n()) %>% # rowwise workaround
partition(cluster) %>%
group_by(row) %>% # rowwise workaround
mutate(p = t.test(rnorm(N), rnorm(N, effect_size))$p.value) %>%
ungroup() %>%
collect()
Sys.time() - start
```
```{r summarize}
simulations = simulations %>%
mutate(significant = p < P_HI) %>%
mutate(uncanny = (p > P_LO) & (p < P_HI))
simulations_summary = simulations %>%
summarise(
.by = c(effect_size, N),
uncanny = mean(uncanny),
significant = mean(significant),
p_lo = mean(p < P_LO)
)
```
```{r line_graphs}
ggplot(simulations_summary) +
aes(x = N*2, y = uncanny, color = factor(effect_size)) +
geom_line(linewidth = 1) +
theme_minimal(12) +
labs(title = "", color = "Cohen's D")
ggplot(simulations_summary) +
aes(x = effect_size, y = uncanny, color = factor(N*2)) +
geom_line(linewidth = 1) +
theme_minimal(12) +
labs(title = "", x = "Cohen's D", color = "N")
```
```{r heatmap}
pretty_percent = function(x) {
ifelse (x < 0.005, "<1%", scales::label_percent(1)(x))
}
# labels
Ns = simulations$N %>% unique() %>% sort()
Ns = Ns * 2
Ns_labels = as.character(Ns)
Ns_labels[seq(2, length(Ns_labels)-3, 2)] = ""
Ns_labels[Ns >= 1000] = as.character(paste0(Ns[Ns >= 1000]/1000, "k"))
ESs = simulations$effect_size %>% unique() %>% sort()
ESs_labels = as.character(ESs)
ESs_labels[seq(2, length(ESs)-1, 2)] = ""
# graph
plot_simulation = ggplot(simulations_summary) +
aes(x = factor(N*2), y = factor(effect_size), fill = uncanny) +
geom_raster() +
geom_text(aes(label = pretty_percent(uncanny))) +
scale_fill_gradientn(
limits = c(0,.25),
na.value = "#9E0142",
labels = scales::label_percent(),
guide = "none",
colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
scale_x_discrete(labels = Ns_labels) +
scale_y_discrete(labels = ESs_labels) +
theme_minimal(12) +
labs(title = 'Probability of getting a p-value in the "Uncanny Mountain"',
subtitle = "What proportion of 10k between-subject simulations have 0.01 < p < 0.05?",
x = "N", y = "Effect size (Cohen's D)")
plot_simulation
```
----------
```{r multiple}
multiple = function(total_ps, uncanny_ps, probability_of_uncanny) {
choose(total_ps, uncanny_ps) *
probability_of_uncanny^uncanny_ps *
(1-probability_of_uncanny)^(total_ps-uncanny_ps)
}
simulations_summary = simulations_summary %>%
rowwise() %>%
mutate(uncanny_multiple = sapply(3:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
ungroup()
plot_simulation3_5 = ggplot(simulations_summary) +
aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
geom_raster() +
geom_text(aes(label = pretty_percent(uncanny_multiple))) +
scale_fill_gradientn(
limits = c(0,.25),
na.value = "#9E0142",
labels = scales::label_percent(),
guide = "none",
colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
scale_x_discrete(labels = Ns_labels) +
scale_y_discrete(labels = ESs_labels) +
theme_minimal(12) +
labs(title = 'Probability of getting at least 3 "Uncanny" p-values from 5 results',
subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 3 p-values in (0.01 < p < 0.05)?",
x = "N", y = "Effect size (Cohen's D)")
plot_simulation3_5
```
```{r}
simulations_summary = simulations_summary %>%
rowwise() %>%
mutate(uncanny_multiple = sapply(4:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
ungroup()
plot_simulation4_5 = ggplot(simulations_summary) +
aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
geom_raster() +
geom_text(aes(label = pretty_percent(uncanny_multiple))) +
scale_fill_gradientn(
limits = c(0,.25),
na.value = "#9E0142",
labels = scales::label_percent(),
guide = "none",
colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
scale_x_discrete(labels = Ns_labels) +
scale_y_discrete(labels = ESs_labels) +
theme_minimal(12) +
labs(title = 'Probability of getting at least 4 "Uncanny" p-values from 5 results',
subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 4 p-values in (0.01 < p < 0.05)?",
x = "N", y = "Effect size (Cohen's D)")
plot_simulation4_5
```
```{r}
plot_simulation / plot_simulation3_5 / plot_simulation4_5
ggsave("~/statistics/multiple uncanny.png", width = 1000, height = 2000, units = "px", scale = 3)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment