Skip to content

Instantly share code, notes, and snippets.

@naupaka
Last active September 19, 2019 14:28
Show Gist options
  • Save naupaka/d9b003308e4aa66e34f93d492428e0a2 to your computer and use it in GitHub Desktop.
Save naupaka/d9b003308e4aa66e34f93d492428e0a2 to your computer and use it in GitHub Desktop.
# function to do a dodged half-boxplot and jittered points next to each other
#
# data_in should be a data frame
# factor_col should be a bare column name (not a string)
# although it will work if that column is factor or a character type
# numeric_col is the y axis continuous variable
# offset is the width of the boxplots and jittered point cloud
#
# the basic approach is to draw a boxplot without the tails
# (e.g. only the interquartile range) and then use segments to add the
# min and max values
#
# in part based on ideas here:
# https://groups.google.com/forum/#!topic/ggplot2/jAyiMHf7DPU
# http://ggplot2.tidyverse.org/reference/geom_boxplot.html
#### NOTE
# Only works with older versions of ggplot, e.g. this won't work with ggplot2 version 3+
# library("devtools")
# install_version("ggplot2", version = "2.2.1", repos = "http://cran.us.r-project.org")
library("rlang")
library("dplyr")
library("ggplot2")
gg_jitterbox <- function(data_in, factor_col, numeric_col, offset) {
# turn bare args into quosures
quo_factor <- enquo(factor_col)
quo_numeric <- enquo(numeric_col)
# do the base R stuff that doesn't play nice with quosures
# the extra factor() call deals with the factor_col parameter potentially
# being character type - otherwise finding levels() etc will fail
# quo_text(quo_factor) just gives back the string of the column name that we
# put as a bare parameter
numeric_factor <- as.numeric(factor(data_in[[quo_text(quo_factor)]]))
ftr_breaks <- seq(length(levels(factor(data_in[[quo_text(quo_factor)]]))))
ftr_labels <- levels(factor(data_in[[quo_text(quo_factor)]]))
# easiest to do this first bit with dplyr instead of pulling out
# of the ggplot object, because we then only have to call ggplot() once
data_in %>%
# another check to make sure we have factors when expected
mutate_if(is.character, as.factor) %>%
# !! unquotes quosures, but only works well with tidyverse
group_by(!!quo_factor) %>%
mutate(d_ymin = min(!!quo_numeric),
d_ymax = max(!!quo_numeric),
d_lower = quantile(!!quo_numeric, 0.25),
d_middle = median(!!quo_numeric),
d_upper = quantile(!!quo_numeric, 0.75)) %>%
ggplot() +
# aes_() requires quoted formula types, but allows mixing
# of enquo-ed bare names (e.g. quo_factor) and just
# normal dplyr-style column names
geom_boxplot(aes_(x = ~numeric_factor - offset,
ymin = ~d_lower,
ymax = ~d_upper,
lower = ~d_lower,
middle = ~d_middle,
upper = ~d_upper,
width = 2 * offset,
fill = quo_factor),
stat = "identity") +
geom_jitter(aes_(x = ~numeric_factor + offset,
y = quo_numeric,
color = quo_factor),
width = offset - 0.25 * offset,
height = 0) +
# bottom vertical segment
geom_segment(aes(x = numeric_factor,
y = d_ymin,
xend = numeric_factor,
yend = d_lower)) +
# top vertical segment
geom_segment(aes(x = numeric_factor,
y = d_ymax,
xend = numeric_factor,
yend = d_upper)) +
# top horizontal segment
geom_segment(aes(x = numeric_factor - offset,
y = d_ymax,
xend = numeric_factor,
yend = d_ymax)) +
# top vertical segment
geom_segment(aes(x = numeric_factor - offset,
y = d_ymin,
xend = numeric_factor,
yend = d_ymin)) +
# have to manually add in the x scale because we made everything numeric
# to do the shifting
scale_x_continuous(breaks = ftr_breaks,
labels = ftr_labels) +
# this also needs to be added manually because of the change to numeric
labs(x = quo_text(quo_factor))
}
# example with factor column stored as factor
gg_jitterbox(data_in = iris,
factor_col = Species,
numeric_col = Sepal.Length,
offset = 0.1) +
ggtitle("Iris and some Quo-Fu")
# example with factor column stored as character
gg_jitterbox(data_in = mpg,
factor_col = manufacturer,
numeric_col = hwy,
offset = 0.2) +
ggtitle("mpg and some Quo-Fu")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment