Skip to content

Instantly share code, notes, and snippets.

@ateucher
Last active March 25, 2020 05:41
Show Gist options
  • Save ateucher/c930dffde79c07b36726d4a9c019b18c to your computer and use it in GitHub Desktop.
Save ateucher/c930dffde79c07b36726d4a9c019b18c to your computer and use it in GitHub Desktop.
*.png
*.pdf
.DS_Store
.Rhistory
library(tidyverse)
library(gghighlight)
library(ggrepel)
library(scales)
fix_name_comma <- function(x) {
name_split <- strsplit(x, ",\\s?")
vapply(name_split, function(x) {
if (length(x) == 1L) return(x)
paste(x[2], x[1])
}, FUN.VALUE = character(1))
}
data <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")
data_long <- pivot_longer(data, `1/22/20`:last_col(), names_to = "date", values_to = "cases") %>%
filter(!is.na(cases),
(!grepl("[Pp]rincess|[Rr]ecovered", `Province/State`) | is.na(`Province/State`)),
`Country/Region` != "Cruise Ship", `Country/Region` != "Diamond Princess") %>%
mutate(date = as.Date(date, format = "%m/%d/%y"),
`Country/Region` = fix_name_comma(`Country/Region`))
max_date <- max(data_long$date)
cap <- glue::glue("Data from https://github.com/CSSEGISandData/COVID-19, {max_date}")
data_by_country <- group_by(data_long, country = `Country/Region`, date) %>%
summarise(cases = sum(cases, na.rm = TRUE))
highlight_countries <- c("Canada", "US", "Italy", "United Kingdom", "Spain", "Australia")
(plot_countries_by_date <- filter(data_by_country, country != "China") %>%
ggplot(aes(x = date, y = cases, colour = country)) +
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") +
scale_y_log10(expand = c(0,0.5),
labels = label_comma(accuracy = 1)) +
geom_line(size = 1.5) +
gghighlight(country %in% highlight_countries,
use_direct_label = FALSE,
unhighlighted_params = list(size = 0.5)) +
geom_text_repel(aes(label = paste0(country, " (", cases, ")")),
data = filter(data_by_country,
country %in% highlight_countries,
date == max(date)),
nudge_x = 1, direction = "y",
segment.size = 0,
xlim = c(max(data_by_country$date),
max(data_by_country$date) + 21)) +
scale_color_brewer(palette = 2, type = "qual", guide = "none") +
coord_cartesian(clip = "off") +
theme_minimal() +
theme(plot.margin = unit(c(0.5,4,0.5,0.5), "cm")) +
labs(x = "Date",
y = "Total number of cases (log scale)",
title = "COVID-19 Confirmed Cases",
caption = cap))
n_cases <- 100
data_by_country_n_days <- data_by_country %>%
filter(cases >= n_cases) %>%
group_by(country) %>%
arrange(country, date) %>%
mutate(days_since_nth_case = as.numeric(date - min(date, na.rm = TRUE))) %>%
ungroup()
(plot_100_cases <- filter(data_by_country_n_days, country != "China") %>%
ggplot(aes(x = days_since_nth_case, y = cases, colour = country)) +
scale_y_log10(labels = label_comma(accuracy = 1)) +
geom_line(size = 1.5) +
gghighlight(country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"),
use_direct_label = FALSE,
unhighlighted_params = list(size = 0.5)) +
geom_text_repel(aes(label = paste0(country, " (", cases, ")")),
data = filter(data_by_country_n_days,
country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"),
date == max(date)),
nudge_x = 2, direction = "both",
segment.size = 0) +
coord_cartesian(clip = "off") +
scale_color_brewer(palette = 3, type = "qual", guide = "none") +
theme_minimal() +
theme(plot.margin = unit(c(0.5,0.5,0.5,0.5), "cm")) +
labs(x = glue::glue("Days since reaching {n_cases} confirmed cases"),
y = "Total number of cases (log scale)",
title = glue::glue("COVID-19 confirmed cases since reaching {n_cases} confirmed cases"),
caption = cap))
data_by_country_1st_case <- data_by_country %>%
filter(cases >= 1) %>%
group_by(country) %>%
arrange(country, date) %>%
mutate(days_since_nth_case = as.numeric(date - min(date, na.rm = TRUE))) %>%
ungroup()
(plot_1st_cases <- filter(data_by_country_1st_case, country != "China") %>%
ggplot(aes(x = days_since_nth_case, y = cases, colour = country)) +
scale_y_log10(labels = label_comma(accuracy = 1)) +
geom_line(size = 1.5) +
gghighlight(country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"),
use_direct_label = FALSE,
unhighlighted_params = list(size = 0.5)) +
geom_text_repel(aes(label = paste0(country, " (", cases, ")")),
data = filter(data_by_country_1st_case,
country %in% c(highlight_countries, "South Korea", "Singapore", "Japan", "Iran"),
date == max(date)) ,
nudge_x = 6, direction = "both",
segment.size = 0,
xlim = c(min(data_by_country_1st_case$days_since_nth_case),
max(data_by_country_1st_case$days_since_nth_case) + 20)) +
scale_color_brewer(palette = 3, type = "qual", guide = "none") +
coord_cartesian(clip = "off") +
theme_minimal() +
theme(plot.margin = unit(c(0.5,4,0.5,0.5), "cm")) +
labs(x = glue::glue("Days since first confirmed case"),
y = "Total number of cases (log scale)",
title = "COVID-19 confirmed cases since first confirmed case",
caption = cap))
canada <- rename(data_long,
country = `Country/Region`,
province = `Province/State`) %>%
filter(country == "Canada")
(plot_canada <- ggplot(canada, aes(x = date, y = cases, colour = province)) +
geom_line(size = 1) +
scale_x_date(date_breaks = "1 week", date_labels = "%b %d") +
scale_y_log10(expand = c(0,1),
labels = label_comma(accuracy = 1)) +
scale_color_brewer(palette = 3, type = "qual", guide = FALSE) +
coord_cartesian(clip = "off") +
geom_text_repel(aes(label = paste0(province, " (", cases, ")")),
segment.size = 0,
data = filter(canada, date == max(date)),
nudge_x = 1, direction = "y",
xlim = c(max(canada$date) + 1, max(canada$date) + 30)
) +
theme_minimal() +
theme(plot.margin = unit(c(0.5,6,0.5,0.5), "cm")) +
labs(x = "Date",
y = "Total number of cases (log scale)",
title = "COVID-19 Confirmed Cases in Canada",
caption = cap))
ggsave(glue::glue("countries_by_date-{max_date}.png"), plot_countries_by_date, width = 9, height = 6)
ggsave(glue::glue("100_cases-{max_date}.png"), plot_100_cases, width = 9, height = 6)
ggsave(glue::glue("1st_cases-{max_date}.png"), plot_1st_cases, width = 9, height = 6)
ggsave(glue::glue("canada-{max_date}.png"), plot_canada, width = 9, height = 6)
@andrew-edwards
Copy link

Thanks for sharing - I just downloaded it and it runs fine. I see you changed a couple of hardwired things - thanks. Just tried the log-axis for provinces - looking like exponential growth (though noisy).

@ateucher
Copy link
Author

Agreed - thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment