Last active
September 7, 2016 18:46
-
-
Save tchakravarty/a997ed47b5238d837e71ce0e26442b97 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(dplyr) | |
library(scales) | |
df_foo = data_frame( | |
var1 = sample(LETTERS[1:4], size = 100, replace = TRUE), | |
var2 = sample(letters[1:4], size = 100, replace = TRUE) | |
) | |
# stacked absolute bar chart | |
df_foo %>% | |
ggplot(aes(x = var1, group = var2, fill = var2)) + | |
geom_bar(stat = "count", position = "stack") + | |
theme_minimal() + | |
ggtitle("Stacked absolute bar chart") | |
# save the plot | |
ggsave("results/stacked bar chart.png") | |
# spineplot (stacked relative bar chart + width) | |
## compute the contingency table | |
df_foo = df_foo %>% | |
group_by(var1, var2) %>% | |
summarise(cell_freq = n()) %>% | |
mutate( | |
var1_total = sum(cell_freq), | |
cell_prop = cell_freq/var1_total | |
) | |
# compute the marginal distribution of var1 | |
df_foo_var1 = df_foo %>% | |
ungroup() %>% | |
distinct(var1, .keep_all = TRUE) %>% | |
select(var1, var1_total) %>% | |
mutate( | |
position = 0.5*(cumsum(var1_total) + cumsum(lag(var1_total, default = 0))) | |
) | |
# put the two datasets together | |
df_foo = df_foo %>% | |
inner_join(df_foo_var1) | |
# plot the data | |
df_foo %>% | |
ggplot(aes(x = position, y = cell_prop, fill = var2, width = var1_total)) + | |
geom_bar(stat = "identity") + | |
scale_y_continuous(labels = percent) + | |
scale_x_continuous(labels = df_foo_var1$var1, breaks = df_foo_var1$position) + | |
theme_minimal() + | |
ggtitle("Spineplot") | |
# save the plot | |
ggsave("results/spineplot.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment