Skip to content

Instantly share code, notes, and snippets.

@jun9
Forked from hadley/points.r
Last active December 24, 2015 21:19
Show Gist options
  • Save jun9/6865057 to your computer and use it in GitHub Desktop.
Save jun9/6865057 to your computer and use it in GitHub Desktop.
library(ggplot2)
library(scales)
library(maps)
library(plyr)
mid_range <- function(x) mean(range(x, na.rm = TRUE))
centres <- ddply(county_df, c("state", "county"), summarise,
lat = mid_range(lat),
long = mid_range(long)
)
bubbles <- merge(centres, unemp, by = c("state", "county"))
ggplot(bubbles, aes(long, lat)) +
geom_polygon(aes(group = group), data = state_df,
colour = "white", fill = NA) +
geom_point(aes(size = rate), alpha = 1/2) +
scale_area(breaks = c(5, 10, 20, 30))
bubbles$rate_d <- cut(bubbles$rate, breaks = c(seq(0, 10, by = 2), 35))
ggplot(bubbles, aes(long, lat)) +
geom_polygon(aes(group = group), data = state_df,
colour = "white", fill = NA) +
geom_point(aes(color = rate_d)) +
scale_colour_brewer(palette = "PuRd")
library(ggplot2)
library(scales)
library(maps)
# First (and most annoying) task - get matching state and county variables
# for both datasets. And unfortauntely it's not quite right, as you can
# see from the finish product - some counties are missing.
unemp <- read.csv("unemployment09.csv", header = F, stringsAsFactors = F)
names(unemp) <- c("id", "state_fips", "county_fips", "name", "year",
"?", "?", "?", "rate")
unemp$county <- tolower(gsub(" County, [A-Z]{2}", "", unemp$name))
unemp$state <- gsub("^.*([A-Z]{2}).*$", "\\1", unemp$name)
county_df <- map_data("county")
names(county_df) <- c("long", "lat", "group", "order", "state_name", "county")
county_df$state <- state.abb[match(county_df$state_name, tolower(state.name))]
county_df$state_name <- NULL
state_df <- map_data("state")
# Combine together
choropleth <- merge(county_df, unemp, by = c("state", "county"))
choropleth <- choropleth[order(choropleth$order), ]
# Discretise rate to use with Brewer colour scheme - many options here
# choropleth$rate_d <- cut_number(choropleth$rate, 5)
# choropleth$rate_d <- cut_interval(choropleth$rate, 5)
# Nathan's choice is a little odd:
choropleth$rate_d <- cut(choropleth$rate, breaks = c(seq(0, 10, by = 2), 35))
# Once you have the data in the right format, recreating the plot is straight
# forward.
ggplot(choropleth, aes(long, lat, group = group)) +
geom_polygon(aes(fill = rate_d), colour = alpha("white", 1/2), size = 0.2) +
geom_polygon(data = state_df, colour = "white", fill = NA) +
scale_fill_brewer(palette = "PuRd")
# Takes a while to draw because ggplot2 not very efficient with large numbers
# of polygons :(
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment