steveharoz · April 26, 2024 09:50
diff --git a/readme.MD b/readme.MD
diff --git a/uncanny.rmd b/uncanny.rmd
 ---
 title: "How likely is the uncanny mountain?"
 author: "by Steve Haroz"
 output: 
  github_document:
 params:
  use_cache: TRUE
 ---


 ```{r setup, warning = FALSE, message = FALSE}
 library(tidyverse)
 library(patchwork)
 library(multidplyr)
 ```

 ```{r parameters}
 # significant level
 P_HI = 0.05
 P_LO = 0.01
 ```

 ```{r initialize}
 simulations = expand.grid(
  effect_size = seq(0, 1, 0.05),
  N = c(seq(20, 100, 5), seq(120, 200, 10), 250, 500, 1000, 5000, 10000), # actually N/2
  rep = 1:10000
 )
 ```

 ```{r parallel}
 core_count = parallel::detectCores() - 1
 cluster = new_cluster(core_count)
 ```


 ```{r simulate, cache=TRUE}
 start = Sys.time()
 simulations = simulations %>% 
  mutate(row = 1:n()) %>%  # rowwise workaround
  partition(cluster) %>% 
  group_by(row) %>%  # rowwise workaround
  mutate(p = t.test(rnorm(N), rnorm(N, effect_size))$p.value) %>% 
  ungroup() %>% 
  collect()
 Sys.time() - start
 ```

 ```{r summarize}
 simulations = simulations %>% 
  mutate(significant = p < P_HI) %>% 
  mutate(uncanny = (p > P_LO) & (p < P_HI))

 simulations_summary = simulations %>% 
  summarise(
    .by = c(effect_size, N),
    uncanny = mean(uncanny),
    significant = mean(significant),
    p_lo = mean(p < P_LO)
  )
 ```


 ```{r line_graphs}
 ggplot(simulations_summary) +
  aes(x = N*2, y = uncanny, color = factor(effect_size)) +
  geom_line(linewidth = 1) +
  theme_minimal(12) +
  labs(title = "", color = "Cohen's D")

 ggplot(simulations_summary) +
  aes(x = effect_size, y = uncanny, color = factor(N*2)) +
  geom_line(linewidth = 1) +
  theme_minimal(12) +
  labs(title = "", x = "Cohen's D", color = "N")
 ```

 ```{r heatmap}
 pretty_percent = function(x) {
  ifelse (x < 0.005, "<1%", scales::label_percent(1)(x))
 }

 # labels
 Ns = simulations$N %>% unique() %>% sort()
 Ns = Ns * 2
 Ns_labels = as.character(Ns)
 Ns_labels[seq(2, length(Ns_labels)-3, 2)] = ""
 Ns_labels[Ns >= 1000] = as.character(paste0(Ns[Ns >= 1000]/1000, "k"))

 ESs = simulations$effect_size %>% unique() %>% sort()
 ESs_labels = as.character(ESs)
 ESs_labels[seq(2, length(ESs)-1, 2)] = ""

 # graph
 plot_simulation = ggplot(simulations_summary) +
  aes(x = factor(N*2), y = factor(effect_size), fill = uncanny) +
  geom_raster() +
  geom_text(aes(label = pretty_percent(uncanny))) +
  scale_fill_gradientn(
    limits = c(0,.25),
    na.value = "#9E0142", 
    labels = scales::label_percent(),
    guide = "none",
    colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
  scale_x_discrete(labels = Ns_labels) +
  scale_y_discrete(labels = ESs_labels) +
  theme_minimal(12) + 
  labs(title = 'Probability of getting a p-value in the "Uncanny Mountain"', 
       subtitle = "What proportion of 10k between-subject simulations have 0.01 < p < 0.05?",
       x = "N", y = "Effect size (Cohen's D)")
 plot_simulation
 ```


 ----------



 ```{r multiple}

 multiple = function(total_ps, uncanny_ps, probability_of_uncanny) {
  choose(total_ps, uncanny_ps) *
    probability_of_uncanny^uncanny_ps * 
    (1-probability_of_uncanny)^(total_ps-uncanny_ps)
 }

 simulations_summary = simulations_summary %>% 
  rowwise() %>% 
  mutate(uncanny_multiple = sapply(3:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>% 
  ungroup()


 plot_simulation3_5 = ggplot(simulations_summary) +
  aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
  geom_raster() +
  geom_text(aes(label = pretty_percent(uncanny_multiple))) +
  scale_fill_gradientn(
    limits = c(0,.25),
    na.value = "#9E0142", 
    labels = scales::label_percent(),
    guide = "none",
    colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
  scale_x_discrete(labels = Ns_labels) +
  scale_y_discrete(labels = ESs_labels) +
  theme_minimal(12) + 
  labs(title = 'Probability of getting at least 3 "Uncanny" p-values from 5 results', 
       subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 3 p-values in (0.01 < p < 0.05)?",
       x = "N", y = "Effect size (Cohen's D)")
 plot_simulation3_5
 ```
 ```{r}

 simulations_summary = simulations_summary %>% 
  rowwise() %>% 
  mutate(uncanny_multiple = sapply(4:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>% 
  ungroup()


 plot_simulation4_5 = ggplot(simulations_summary) +
  aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
  geom_raster() +
  geom_text(aes(label = pretty_percent(uncanny_multiple))) +
  scale_fill_gradientn(
    limits = c(0,.25),
    na.value = "#9E0142", 
    labels = scales::label_percent(),
    guide = "none",
    colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
  scale_x_discrete(labels = Ns_labels) +
  scale_y_discrete(labels = ESs_labels) +
  theme_minimal(12) + 
  labs(title = 'Probability of getting at least 4 "Uncanny" p-values from 5 results', 
       subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 4 p-values in (0.01 < p < 0.05)?",
       x = "N", y = "Effect size (Cohen's D)")
 plot_simulation4_5
 ```

 ```{r}
 plot_simulation / plot_simulation3_5 / plot_simulation4_5
 ggsave("~/statistics/multiple uncanny.png", width = 1000, height = 2000, units = "px", scale = 3)
 ```
	---
	title: "How likely is the uncanny mountain?"
	author: "by Steve Haroz"
	output:
	github_document:
	params:
	use_cache: TRUE
	---


	```{r setup, warning = FALSE, message = FALSE}
	library(tidyverse)
	library(patchwork)
	library(multidplyr)
	```

	```{r parameters}
	# significant level
	P_HI = 0.05
	P_LO = 0.01
	```

	```{r initialize}
	simulations = expand.grid(
	effect_size = seq(0, 1, 0.05),
	N = c(seq(20, 100, 5), seq(120, 200, 10), 250, 500, 1000, 5000, 10000), # actually N/2
	rep = 1:10000
	)
	```

	```{r parallel}
	core_count = parallel::detectCores() - 1
	cluster = new_cluster(core_count)
	```


	```{r simulate, cache=TRUE}
	start = Sys.time()
	simulations = simulations %>%
	mutate(row = 1:n()) %>% # rowwise workaround
	partition(cluster) %>%
	group_by(row) %>% # rowwise workaround
	mutate(p = t.test(rnorm(N), rnorm(N, effect_size))$p.value) %>%
	ungroup() %>%
	collect()
	Sys.time() - start
	```

	```{r summarize}
	simulations = simulations %>%
	mutate(significant = p < P_HI) %>%
	mutate(uncanny = (p > P_LO) & (p < P_HI))

	simulations_summary = simulations %>%
	summarise(
	.by = c(effect_size, N),
	uncanny = mean(uncanny),
	significant = mean(significant),
	p_lo = mean(p < P_LO)
	)
	```


	```{r line_graphs}
	ggplot(simulations_summary) +
	aes(x = N*2, y = uncanny, color = factor(effect_size)) +
	geom_line(linewidth = 1) +
	theme_minimal(12) +
	labs(title = "", color = "Cohen's D")

	ggplot(simulations_summary) +
	aes(x = effect_size, y = uncanny, color = factor(N*2)) +
	geom_line(linewidth = 1) +
	theme_minimal(12) +
	labs(title = "", x = "Cohen's D", color = "N")
	```

	```{r heatmap}
	pretty_percent = function(x) {
	ifelse (x < 0.005, "<1%", scales::label_percent(1)(x))
	}

	# labels
	Ns = simulations$N %>% unique() %>% sort()
	Ns = Ns * 2
	Ns_labels = as.character(Ns)
	Ns_labels[seq(2, length(Ns_labels)-3, 2)] = ""
	Ns_labels[Ns >= 1000] = as.character(paste0(Ns[Ns >= 1000]/1000, "k"))

	ESs = simulations$effect_size %>% unique() %>% sort()
	ESs_labels = as.character(ESs)
	ESs_labels[seq(2, length(ESs)-1, 2)] = ""

	# graph
	plot_simulation = ggplot(simulations_summary) +
	aes(x = factor(N*2), y = factor(effect_size), fill = uncanny) +
	geom_raster() +
	geom_text(aes(label = pretty_percent(uncanny))) +
	scale_fill_gradientn(
	limits = c(0,.25),
	na.value = "#9E0142",
	labels = scales::label_percent(),
	guide = "none",
	colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
	scale_x_discrete(labels = Ns_labels) +
	scale_y_discrete(labels = ESs_labels) +
	theme_minimal(12) +
	labs(title = 'Probability of getting a p-value in the "Uncanny Mountain"',
	subtitle = "What proportion of 10k between-subject simulations have 0.01 < p < 0.05?",
	x = "N", y = "Effect size (Cohen's D)")
	plot_simulation
	```


	----------



	```{r multiple}

	multiple = function(total_ps, uncanny_ps, probability_of_uncanny) {
	choose(total_ps, uncanny_ps) *
	probability_of_uncanny^uncanny_ps *
	(1-probability_of_uncanny)^(total_ps-uncanny_ps)
	}

	simulations_summary = simulations_summary %>%
	rowwise() %>%
	mutate(uncanny_multiple = sapply(3:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
	ungroup()


	plot_simulation3_5 = ggplot(simulations_summary) +
	aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
	geom_raster() +
	geom_text(aes(label = pretty_percent(uncanny_multiple))) +
	scale_fill_gradientn(
	limits = c(0,.25),
	na.value = "#9E0142",
	labels = scales::label_percent(),
	guide = "none",
	colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
	scale_x_discrete(labels = Ns_labels) +
	scale_y_discrete(labels = ESs_labels) +
	theme_minimal(12) +
	labs(title = 'Probability of getting at least 3 "Uncanny" p-values from 5 results',
	subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 3 p-values in (0.01 < p < 0.05)?",
	x = "N", y = "Effect size (Cohen's D)")
	plot_simulation3_5
	```
	```{r}

	simulations_summary = simulations_summary %>%
	rowwise() %>%
	mutate(uncanny_multiple = sapply(4:5, function(k) multiple(5, k, uncanny)) %>% sum()) %>%
	ungroup()


	plot_simulation4_5 = ggplot(simulations_summary) +
	aes(x = factor(N*2), y = factor(effect_size), fill = uncanny_multiple) +
	geom_raster() +
	geom_text(aes(label = pretty_percent(uncanny_multiple))) +
	scale_fill_gradientn(
	limits = c(0,.25),
	na.value = "#9E0142",
	labels = scales::label_percent(),
	guide = "none",
	colors = rev(RColorBrewer::brewer.pal(11, "Spectral"))) +
	scale_x_discrete(labels = Ns_labels) +
	scale_y_discrete(labels = ESs_labels) +
	theme_minimal(12) +
	labs(title = 'Probability of getting at least 4 "Uncanny" p-values from 5 results',
	subtitle = "What proportion of 10k between-subject simulations of 5 p-values have 4 p-values in (0.01 < p < 0.05)?",
	x = "N", y = "Effect size (Cohen's D)")
	plot_simulation4_5
	```

	```{r}
	plot_simulation / plot_simulation3_5 / plot_simulation4_5
	ggsave("~/statistics/multiple uncanny.png", width = 1000, height = 2000, units = "px", scale = 3)
	```