Skip to content

Instantly share code, notes, and snippets.

@tchakravarty
Last active September 7, 2016 18:46
Show Gist options
  • Save tchakravarty/a997ed47b5238d837e71ce0e26442b97 to your computer and use it in GitHub Desktop.
Save tchakravarty/a997ed47b5238d837e71ce0e26442b97 to your computer and use it in GitHub Desktop.
library(ggplot2)
library(dplyr)
library(scales)
df_foo = data_frame(
var1 = sample(LETTERS[1:4], size = 100, replace = TRUE),
var2 = sample(letters[1:4], size = 100, replace = TRUE)
)
# stacked absolute bar chart
df_foo %>%
ggplot(aes(x = var1, group = var2, fill = var2)) +
geom_bar(stat = "count", position = "stack") +
theme_minimal() +
ggtitle("Stacked absolute bar chart")
# save the plot
ggsave("results/stacked bar chart.png")
# spineplot (stacked relative bar chart + width)
## compute the contingency table
df_foo = df_foo %>%
group_by(var1, var2) %>%
summarise(cell_freq = n()) %>%
mutate(
var1_total = sum(cell_freq),
cell_prop = cell_freq/var1_total
)
# compute the marginal distribution of var1
df_foo_var1 = df_foo %>%
ungroup() %>%
distinct(var1, .keep_all = TRUE) %>%
select(var1, var1_total) %>%
mutate(
position = 0.5*(cumsum(var1_total) + cumsum(lag(var1_total, default = 0)))
)
# put the two datasets together
df_foo = df_foo %>%
inner_join(df_foo_var1)
# plot the data
df_foo %>%
ggplot(aes(x = position, y = cell_prop, fill = var2, width = var1_total)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = percent) +
scale_x_continuous(labels = df_foo_var1$var1, breaks = df_foo_var1$position) +
theme_minimal() +
ggtitle("Spineplot")
# save the plot
ggsave("results/spineplot.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment