|
--- |
|
title: "R Notebook" |
|
output: html_notebook |
|
--- |
|
|
|
```{r} |
|
library(tidyverse) |
|
library(widyr) |
|
library(cowplot) |
|
library(ggthemes) |
|
theme_set(theme_cowplot()) |
|
|
|
pizzeria_menus <- read_csv("pizzeria_menus.csv") |
|
pizzeria_map <- read_csv("pizzeria_coordinates_and_pope_preferences.csv") |
|
pizzeria_item_similarities <- read_csv("pizzeria_menu_item_similarities.csv") |
|
|
|
p1 <- pizzeria_menus %>% gather("pizza", "presence", -pizzeria) %>% group_by(pizza) %>% summarise(proportion=sum(presence)/n()) %>% |
|
mutate(pizza=fct_reorder(pizza, proportion)) %>% |
|
ggplot(aes(y=pizza, x=proportion)) + |
|
geom_segment(aes(x=0, xend=proportion, y=pizza, yend=pizza))+ |
|
geom_point() + |
|
scale_y_discrete("Pizza Type") + |
|
scale_x_continuous("Frequency on menu") |
|
|
|
p2 <- pizzeria_item_similarities %>% |
|
left_join(pizzeria_map %>% select(-pope) %>% |
|
gather("coordinate", "value", -pizzeria) %>% |
|
pairwise_dist(pizzeria, feature = coordinate, value=value) %>% |
|
select(pizzeria_1=item1, pizzeria_2=item2, distance)) %>% |
|
left_join(select(pizzeria_map, pizzeria_1=pizzeria, pope_1=pope)) %>% |
|
left_join(select(pizzeria_map, pizzeria_2=pizzeria, pope_2=pope)) %>% |
|
mutate(`Pope in common`=if_else(pope_1==pope_2 & !is.na(pope_1) & pope_1 != "none", pope_1, "n/a")) %>% |
|
mutate(`Pope in common`=fct_relevel(`Pope in common`, "n/a", "Benedict XVI", "Francis", "John Paul II")) %>% |
|
ggplot(aes(x=distance, y=similarity)) + |
|
geom_point(aes(color=`Pope in common`), alpha=0.8, size=0.5)+ |
|
#stat_smooth(aes(color=item), method="lm") + |
|
facet_wrap(~item) + |
|
scale_x_continuous("Pairwise Geographical Distance", breaks=c(100,300,500)) + |
|
scale_y_continuous("Pairwise Item Similarity") + |
|
scale_color_manual(values=c("#999999", "#E69F00", "#0072B2", "#009E73")) + |
|
ggtitle("","Pizza Similarity by Geographical Distance") + |
|
theme(strip.text = element_text(size=8)) |
|
|
|
|
|
p2 |
|
save_plot("frequency_of_pizza.png", p1, base_width=3, base_height=2) |
|
save_plot("pizza_similarity_by_distance.png", p2, base_width=3.5, base_height=2.5) |
|
``` |
|
|
|
```{r} |
|
# simulating similarities with pope effect |
|
papal_effect <- tribble(~pope, ~item, ~love, |
|
"John Paul II", "mortadella", 2, |
|
"Benedict XVI", "diavola", 1, |
|
"Benedict XVI", "salsiccia", 1, |
|
"Francis", "bistecca", 0.6) |
|
|
|
menu_items <- tribble(~item, ~sacred, |
|
"marinara", 0.95, |
|
"margherita", 0.95, |
|
"diavola", 0.7, |
|
"speck", 0.75, |
|
"salsiccia", 0.75, |
|
"flori_di_zucca", 0.6, |
|
"fungo", 0.75, |
|
"rapini", 0.95, |
|
"bistecca", 0.75, |
|
"mortadella", 0.6) |
|
|
|
pairwise_menu_items <- pizzeria_map %>% select(-pope) %>% |
|
gather("coordinate", "value", -pizzeria) %>% |
|
pairwise_dist(pizzeria, feature = coordinate, value=value) %>% |
|
select(pizzeria_1=item1, pizzeria_2=item2, distance) %>% |
|
select(pizzeria_1, pizzeria_2) %>% |
|
mutate(item = rep(select(menu_items, item), n())) %>% |
|
unnest() |
|
|
|
set.seed(42) |
|
pizzeria_item_similarities <- pizzeria_map %>% select(-pope) %>% |
|
gather("coordinate", "value", -pizzeria) %>% |
|
pairwise_dist(pizzeria, feature = coordinate, value=value, upper=FALSE) %>% |
|
select(pizzeria_1=item1, pizzeria_2=item2, distance) %>% |
|
mutate(dist_effect = distance/max(distance)) %>% |
|
left_join(pairwise_menu_items) %>% |
|
left_join(menu_items) %>% |
|
left_join(select(pizzeria_map, pizzeria_1=pizzeria, pope_1=pope)) %>% |
|
left_join(select(pizzeria_map, pizzeria_2=pizzeria, pope_2=pope)) %>% |
|
left_join(select(papal_effect, pope_1=pope, item, love_1=love)) %>% |
|
left_join(select(papal_effect, pope_2=pope, item, love_2=love)) %>% |
|
mutate(similarity = plogis(qlogis(sacred) - dist_effect + rnorm(n(), 0, 0.2) + if_else(!is.na(love_1) & !is.na(love_2) & pope_1==pope_2, love_1, 0))) %>% |
|
mutate(similarity = if_else(pizzeria_1 == pizzeria_2, 1, similarity)) %>% |
|
left_join(pizzeria_menus %>% gather(item, presence_1, -pizzeria), by=c("pizzeria_1"="pizzeria", "item"="item")) %>% |
|
left_join(pizzeria_menus %>% gather(item, presence_2, -pizzeria), by=c("pizzeria_2"="pizzeria", "item"="item")) %>% |
|
mutate(similarity = if_else(presence_1 == 1 & presence_2 == 1, similarity, NA_real_)) %>% |
|
select(pizzeria_1, pizzeria_2, pope_1, pope_2, item, similarity) |
|
|
|
pizzeria_item_similarities %>% select(pizzeria_1, pizzeria_2, item, similarity) %>% write_csv("pizzeria_menu_item_similarities.csv") |
|
|
|
pizzeria_item_similarities %>% select(pizzeria_1, pizzeria_2, item, similarity) %>% |
|
filter(item=="marinara") %>% |
|
select(-item) %>% |
|
spread(pizzeria_2, similarity) |
|
|
|
pizzeria_item_similarities %>% filter(pizzeria_1 %in% c("P01", "P02"), pizzeria_2 %in% c("P01", "P02"), item=="marinara") |
|
``` |