potterzot · October 11, 2019 19:04
diff --git a/dc.R b/dc.R
 ################################################################
 # EVERYTHING HERE IS THE CODE THE LESSON IS BASED OFF OF
 # IN THE NEXT SECTION IS CODE ACTUALLY WRITTEN DURING THE LESSON
 ################################################################

 install.packages("tidyverse")
 install.packages(c("here", "reprex"))

 ### Resources
 # Stack overflow: stackoverflow.com
 # Jenny Bryan (@jennybryan) and the #rstats hashtag on twitter
 # Advanced R book (for all levels): https://adv-r.hadley.nz/
 # Geocomputation for R book: https://geocompr.robinlovelace.net/


 ### R Package examples
 # https://github.com/ropensci/rnoaa
 # https://github.com/tidyverse/reprex
 # https://cran.r-project.org/web/packages/rnassqs/index.html

 ### Remember these shortcuts:
 # CTRL+Enter: runs the code block
 # CTRL+1: Switch to script window
 # CTRL+2: Switch to console window


 #### Load libraries ----
 library(tidyverse)
 library(reprex)
 library(here)


 # Setup a project (usually you've already done this)
 # 1. Create a project in Rstudio
 dir.create("data_raw")
 dir.create("data")
 dir.create("fig")
 dir.create("src")

  
 #### Download and load data ----
 if(!file.exists(here("data_raw/rodent_counts.csv"))) {
  download.file(url="https://ndownloader.figshare.com/files/2292169",
                destfile = "data_raw/rodent_counts.csv")
 }

 # using rodent_counts
 surveys <- read_csv(here("data_raw/rodent_counts.csv"))
 # read.csv
 # read.table
 # read_
 # help(read.csv)

 #### dplyr verbs ----

 ## Selecting
 sel_surveys <- select(surveys, plot_id, species_id, weight)
 head(sub_surveys)

 ## Filtering
 range(surveys$year)
 fil_surveys <- filter(surveys, year == 1995)

 ## Pipes

 # EXERCISE: How would you filter weight < 5 and include species_id, sex, and weight?
 surveys_sml <- select(filter(surveys, weight < 5), species_id, sex, weight)

 # Piping is more readable
 surveys_sml <- surveys %>%
  select(species_id, sex, weight) %>%
  filter(weight < 5)

 ## Mutate
 surveys %>%
  mutate(weight_kg = weight / 1000)

 surveys %>%
  mutate(weight_kg = weight / 1000,
         weight_lb = weight_kg * 2.2)

 surveys %>%
  filter(!is.na(weight)) %>%
  mutate(weight_kg = weight / 1000) %>%
  head()

 # EXERCISE: Create a new data frame that meets the following criteria: 
 # 1. contains only the species_id column and a new column called hindfoot_half
 # 2. hindfoot_half is half of hindfoot_length values
 # 3. There are no NA values and all values are less than 30.
 surveys_hindfoot_half <- surveys %>%
  filter(!is.na(hindfoot_length)) %>%
  mutate(hindfoot_half = hindfoot_length / 2) %>%
  filter(hindfoot_half < 30) %>%
  select(species_id, hindfoot_half)

 ## Split-Apply-Combine
 surveys %>%
  group_by(sex) %>%
  summarize(mean_weight = mean(weight, na.rm = TRUE))

 surveys %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight, na.rm = TRUE))

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight, na.rm = TRUE))

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight),
            min_weight = min(weight))

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species_id) %>%
  summarize(mean_weight = mean(weight),
            min_weight = min(weight)) %>%
  arrange(min_weight)

 ## count(), n(), N(), row_number()
 surveys %>%
  count(sex, sort = TRUE)

 surveys %>%
  group_by(species, sex) %>%
  count(sort = TRUE)

 surveys %>%
  group_by(species, sex) %>%
  filter(row_number() == 1)

 surveys %>%
  group_by(species, sex) %>%
  mutate(n_grp = n()) %>%
  filter(row_number() == n_grp) %>%
  head()

 surveys %>%
  group_by(species, plot_id) %>%
  filter(row_number() == n()) %>%
  head()



 # EXERCISE: How would you select the record with the group max weight?
 # 1. Group by sex, species
 # 2. For each group, select the observation with the maximum
 # 3. Sort by species and sex
 # 4. return just species, sex, and weight
 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species) %>%
  filter(weight == max(weight)) %>%
  select(species, sex, weight) %>%
  arrange(species, sex)

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species) %>%
  arrange(weight) %>%
  filter(row_number() == n()) %>%
  select(species, sex, weight) %>%
  arrange(species, sex)

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(sex, species) %>%
  arrange(desc(weight)) %>%
  filter(row_number() == 1) %>%
  select(species, sex, weight) %>%
  arrange(species, sex)



 ## Joins and mutate_at, mutate_if
 # left_join: keeps all records of the first data.frame
 # right_join: keeps all records of the second data.frame
 # inner_join: keeps records that are in both
 # full_join: keeps all records from either
 survey_means <- surveys %>%
  group_by(species, sex) %>%
  summarize(weight_mean = mean(weight, na.rm = TRUE),
            hindfoot_mean = mean(hindfoot_length, na.rm = TRUE))

 survey_sd <- surveys %>%
  group_by(species, sex) %>%
  summarize(weight_sd = sd(weight, na.rm = TRUE),
            hindfoot_sd = sd(hindfoot_length, na.rm = TRUE))

 survey_join1 <- left_join(surveys, survey_means, by = c("species", "sex"))
 survey_join2 <- left_join(surveys, survey_sd, by = c("species", "sex"))

 # Reduce alternative
 Reduce(left_join, list(surveys, survey_means, survey_sd))

 # Exercise: How could you do this in one call with mutate?
 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species, sex) %>%
  mutate(weight_mean = mean(weight, na.rm = TRUE),
         weight_sd = sd(weight)
         weight_dev = weight - weight_mean
         hindfoot_mean = mean(hindfoot_length, na.rm = TRUE),
         hindfoot_sd = sd(hindfoot_length, na.rm = TRUE),
         hindfoot_dev = hindfoot_length - hindfoot_mean) %>%
  select(species, sex, 
         weight, weight_dev, weight_mean,
         hindfoot_length, hindfoot_dev, hindfoot_mean
         )
         


 ## Tidy data:
 # Each variable has its own column
 # Each observation has its own row
 # Each value must have its own cell
 # Each type of observational unit forms a table
 surveys_gw <- surveys %>%
  filter(!is.na(weight)) %>%
  group_by(genus, plot_id) %>%
  summarize(mean_weight = mean(weight))

 ## Spread
 # https://datacarpentry.org/R-ecology-lesson/img/spread_data_R.png
 help(spread)

 surveys_gw %>%
  spread(genus, mean_weight) %>%
  head()

 surveys_wide <- surveys_gw %>%
  spread(key = genus, value = mean_weight, fill = 0)
 head(surveys_spread)

 ## Gather
 # https://datacarpentry.org/R-ecology-lesson/img/gather_data_R.png
 surveys_wide %>%
  gather(key = genus, value = mean_weight, -plot_id)

 surveys_long <- surveys_wide %>%
  gather(key = genus, value = mean_weight, -plot_id)


 #########################################
 # CODE ACTUALLY WRITTEN DURING THE LESSON
 #########################################

 install.packages("tidyverse")
 install.packages(c("here", "reprex"))

 ### Resources
 # Stack overflow: stackoverflow.com
 # Jenny Bryan (@jennybryan) and the #rstats hashtag on twitter
 # Advanced R book (for all levels): https://adv-r.hadley.nz/
 # Geocomputation for R book: https://geocompr.robinlovelace.net/


 ### R Package examples
 # https://github.com/ropensci/rnoaa
 # https://github.com/tidyverse/reprex
 # https://cran.r-project.org/web/packages/rnassqs/index.html

 #### Setup and load libraries ----
 install.packages(c("here", "reprex"))

 library(tidyverse)
 library(here)
 library(reprex)
 library(purrr)
 library(forcats)

 dir.create("data_raw")
 dir.create("data")
 dir.create("fig")

 if(!dir.exists("src")) {
  dir.create("src")
 }


 #### Load and setup data ----
 download.file(url="https://ndownloader.figshare.com/files/2292169",
              destfile = "data_raw/rodent_counts.csv")

 surveys <- read_csv(here("data_raw/rodent_counts.csv"))

 dim(surveys)

 head(surveys)

 class(surveys)

 surveys

 surveys_alt <- read.csv("data_raw/rodent_counts.csv")
 class(surveys_alt)

 ## Using dplyr
 surveys[1:3, 1]

 select(surveys, 1)
 select(surveys, record_id, year)

 head(surveys)
 select(surveys, -genus, -species)

 surveys[surveys$year < 1995,]

 filter(surveys, year < 1995)
 filter(surveys, !is.na(weight))

 ## Exercise: Filter weight less than 5 and include species_id, sex, and weight
 filteredWeight <- filter(surveys,weight<5)
 select(filteredWeight,species_id,sex,weight)

 filter(surveys, weight < 5) %>%
  select(species_id, sex, weight)

 surveys %>%
  filter(weight < 5) %>%
  select(species_id, sex, weight)

 ## Mutate / transmute
 surveys %>%
  filter(!is.na(weight)) %>%
  mutate(weight_kg = weight/1000,
         weight_lb = weight_kg * 2.2) %>%
  select(record_id, weight, weight_kg, weight_lb)

 surveys %>%
  filter(!is.na(weight)) %>%
  transmute(record_id, 
            weight_kg = weight/1000,
            weight_lb = weight_kg * 2.2)

 ## Exercise
 # 1. contains only the species_id column and a new column hindfoot_half
 # 2. hindfoot_half = hindfoot_length / 2
 # 3. values of hindfoot_half < 30 and are not NA
 surveys %>%
  transmute(species_id,
            hindfoot_half=hindfoot_length/2) %>%
  filter(hindfoot_half < 30, !is.na(hindfoot_half))

 surveys %>% 
  transmute(species_id,
            hindfoot_half = hindfoot_length / 2) %>% 
  filter(!is.na(hindfoot_half), 
         hindfoot_half < 30)

 surveys %>% 
  filter(!is.na(hindfoot_length)) %>% 
  filter(hindfoot_length<60) %>% 
  transmute(species_id,
            hindfoot_half=hindfoot_length/2)

 surveys %>% 
  mutate(hindfoot_half = hindfoot_length / 2) %>%
  filter(hindfoot_half < 30) %>%
  filter(!is.na(hindfoot_length)) %>%
  select(species_id, hindfoot_half)

 surveys %>%
  filter(!is.na(hindfoot_length) & hindfoot_length < 60) %>%
  transmute(hindfoot_half = hindfoot_length/2) %>%
  select(species_id,hindfoot_length,hindfoot_half)





 surveys %>%
  mutate(hindfoot_half = hindfoot_length/2)

 ## Grouping variables and applying
 ## Split-Apply-Combine
 names(surveys)

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  summarize(
    weight = mean(weight))

 ab <- surveys %>%
  filter(species_id == "AB") %>%
  select(species_id, sex, weight) 

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  summarize(
    weight_mean = mean(weight),
    weight_sd = sd(weight),
    weight_min = min(weight),
    grp_n = n())

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  filter(weight == min(weight))

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  filter(weight == min(weight))

 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  summarize(weight_mean = mean(weight)) %>%
  arrange(desc(weight_mean))
  
 surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  filter(row_number() == n())

 ## Exercise:
 # Select the row with the maximum weight for each species_id and sex
 surveys %>% 
  filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  arrange(weight)
  filter(row_number() == n())

 x <- data.frame(
  record_id = c("a", "b", "c"),
  weight = c(1,4.5,3)
 )

 x

 x %>%
  arrange(weight) %>%
  filter(row_number() == n())

 x %>%
  arrange(desc(weight)) %>%
  filter(row_number() == 1)


 surveys %>%
 filter(!is.na(weight)) %>%
  group_by(species_id, sex) %>%
  filter(weight == max(weight)) %>%
  filter(row_number()==1)

 surveys %>%   
  filter(!is.na(weight), !is.na(sex)) %>%   
  group_by(species_id, sex) %>%   
  arrange(desc(weight)) %>%  
  filter(row_number() == 1)

 ## Means and standard deviations by group (species_id, plot_id)
 gm <- surveys %>%
  filter(!is.na(weight)) %>%
  group_by(species_id, plot_id) %>%
  summarize(weight_gm = mean(weight))

 # So that we don't have to filter by is.na every time
 surveys_w_weight <- surveys %>%
  filter(!is.na(weight))

 gd <- surveys_w_weight %>%
  group_by(species_id, plot_id) %>%
  transmute(record_id, weight_gd = weight - mean(weight)) %>%
  ungroup()
 head(gd)

 ## Merging data
 # left_join(x,y, by = <some variable>) # keeps all records from x
 # right_join(x,y, by = ...) #keeps all records from y and only matching from x
 # inner_join(x,y, by = ...) #keeps only records that matched from both
 # full_join(x,y, by = ...) # Keeps all records from x and all from y
 surveys2 <- left_join(surveys, gm, by = c("species_id", "plot_id")) %>%
  select(record_id, species_id, plot_id, weight, weight_gm) %>%
  head()

 surveys3 <- left_join(surveys2, select(gd, -species_id, -plot_id), by = "record_id")
 head(surveys3)

 # Exercise
 # 1. Create a data set like surveys but with the two additional variables:
 #    a. weight_gm
 #    b. weight_gd
 surveys_w_weight %>%
  group_by(species_id, plot_id) %>%
  mutate(weight_gm = mean(weight),
         weight_gd = weight - weight_gm) %>%
  select(record_id, species_id, plot_id,
         weight, weight_gm, weight_gd) %>%
  head()

 head(surveys3)

 ## One more thing on merging
 surveys3a <- Reduce(left_join, list(surveys, gm, gd))

 ## Tidy Data
 # Each variable has it's own column
 # Each observation has it's own row
 # Each value to have it's own cell
 head(surveys)

 surveys_gm <- surveys_w_weight %>%
  group_by(genus, plot_id) %>%
  summarize(weight_mean = mean(weight))

 surveys_gm

 ## Long to wide
 surveys_wide <- surveys_gm %>%
  spread(key = genus, value = weight_mean)

 ## Wide to long
 surveys_long <- surveys_wide %>%
  gather(key = genus, value = weight_mean, -plot_id)

 head(surveys_long)
 head(surveys_gm)

 # Exercise Transform to long so that each column has the year
 surveys_year <- surveys %>%
  filter(!is.na(weight)) %>%
  select(plot_id, year, weight) %>%
  group_by(plot_id, year) %>%
  summarize(weight_mean = mean(weight)) %>%
  spread(key = year, value = weight_mean)

 ## Save your output!!!!
 write_csv(surveys_year, "data/surveys_year.csv")
	################################################################
	# EVERYTHING HERE IS THE CODE THE LESSON IS BASED OFF OF
	# IN THE NEXT SECTION IS CODE ACTUALLY WRITTEN DURING THE LESSON
	################################################################

	install.packages("tidyverse")
	install.packages(c("here", "reprex"))

	### Resources
	# Stack overflow: stackoverflow.com
	# Jenny Bryan (@jennybryan) and the #rstats hashtag on twitter
	# Advanced R book (for all levels): https://adv-r.hadley.nz/
	# Geocomputation for R book: https://geocompr.robinlovelace.net/


	### R Package examples
	# https://github.com/ropensci/rnoaa
	# https://github.com/tidyverse/reprex
	# https://cran.r-project.org/web/packages/rnassqs/index.html

	### Remember these shortcuts:
	# CTRL+Enter: runs the code block
	# CTRL+1: Switch to script window
	# CTRL+2: Switch to console window


	#### Load libraries ----
	library(tidyverse)
	library(reprex)
	library(here)


	# Setup a project (usually you've already done this)
	# 1. Create a project in Rstudio
	dir.create("data_raw")
	dir.create("data")
	dir.create("fig")
	dir.create("src")


	#### Download and load data ----
	if(!file.exists(here("data_raw/rodent_counts.csv"))) {
	download.file(url="https://ndownloader.figshare.com/files/2292169",
	destfile = "data_raw/rodent_counts.csv")
	}

	# using rodent_counts
	surveys <- read_csv(here("data_raw/rodent_counts.csv"))
	# read.csv
	# read.table
	# read_
	# help(read.csv)

	#### dplyr verbs ----

	## Selecting
	sel_surveys <- select(surveys, plot_id, species_id, weight)
	head(sub_surveys)

	## Filtering
	range(surveys$year)
	fil_surveys <- filter(surveys, year == 1995)

	## Pipes

	# EXERCISE: How would you filter weight < 5 and include species_id, sex, and weight?
	surveys_sml <- select(filter(surveys, weight < 5), species_id, sex, weight)

	# Piping is more readable
	surveys_sml <- surveys %>%
	select(species_id, sex, weight) %>%
	filter(weight < 5)

	## Mutate
	surveys %>%
	mutate(weight_kg = weight / 1000)

	surveys %>%
	mutate(weight_kg = weight / 1000,
	weight_lb = weight_kg * 2.2)

	surveys %>%
	filter(!is.na(weight)) %>%
	mutate(weight_kg = weight / 1000) %>%
	head()

	# EXERCISE: Create a new data frame that meets the following criteria:
	# 1. contains only the species_id column and a new column called hindfoot_half
	# 2. hindfoot_half is half of hindfoot_length values
	# 3. There are no NA values and all values are less than 30.
	surveys_hindfoot_half <- surveys %>%
	filter(!is.na(hindfoot_length)) %>%
	mutate(hindfoot_half = hindfoot_length / 2) %>%
	filter(hindfoot_half < 30) %>%
	select(species_id, hindfoot_half)

	## Split-Apply-Combine
	surveys %>%
	group_by(sex) %>%
	summarize(mean_weight = mean(weight, na.rm = TRUE))

	surveys %>%
	group_by(sex, species_id) %>%
	summarize(mean_weight = mean(weight, na.rm = TRUE))

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(sex, species_id) %>%
	summarize(mean_weight = mean(weight, na.rm = TRUE))

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(sex, species_id) %>%
	summarize(mean_weight = mean(weight),
	min_weight = min(weight))

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(sex, species_id) %>%
	summarize(mean_weight = mean(weight),
	min_weight = min(weight)) %>%
	arrange(min_weight)

	## count(), n(), N(), row_number()
	surveys %>%
	count(sex, sort = TRUE)

	surveys %>%
	group_by(species, sex) %>%
	count(sort = TRUE)

	surveys %>%
	group_by(species, sex) %>%
	filter(row_number() == 1)

	surveys %>%
	group_by(species, sex) %>%
	mutate(n_grp = n()) %>%
	filter(row_number() == n_grp) %>%
	head()

	surveys %>%
	group_by(species, plot_id) %>%
	filter(row_number() == n()) %>%
	head()



	# EXERCISE: How would you select the record with the group max weight?
	# 1. Group by sex, species
	# 2. For each group, select the observation with the maximum
	# 3. Sort by species and sex
	# 4. return just species, sex, and weight
	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(sex, species) %>%
	filter(weight == max(weight)) %>%
	select(species, sex, weight) %>%
	arrange(species, sex)

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(sex, species) %>%
	arrange(weight) %>%
	filter(row_number() == n()) %>%
	select(species, sex, weight) %>%
	arrange(species, sex)

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(sex, species) %>%
	arrange(desc(weight)) %>%
	filter(row_number() == 1) %>%
	select(species, sex, weight) %>%
	arrange(species, sex)



	## Joins and mutate_at, mutate_if
	# left_join: keeps all records of the first data.frame
	# right_join: keeps all records of the second data.frame
	# inner_join: keeps records that are in both
	# full_join: keeps all records from either
	survey_means <- surveys %>%
	group_by(species, sex) %>%
	summarize(weight_mean = mean(weight, na.rm = TRUE),
	hindfoot_mean = mean(hindfoot_length, na.rm = TRUE))

	survey_sd <- surveys %>%
	group_by(species, sex) %>%
	summarize(weight_sd = sd(weight, na.rm = TRUE),
	hindfoot_sd = sd(hindfoot_length, na.rm = TRUE))

	survey_join1 <- left_join(surveys, survey_means, by = c("species", "sex"))
	survey_join2 <- left_join(surveys, survey_sd, by = c("species", "sex"))

	# Reduce alternative
	Reduce(left_join, list(surveys, survey_means, survey_sd))

	# Exercise: How could you do this in one call with mutate?
	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species, sex) %>%
	mutate(weight_mean = mean(weight, na.rm = TRUE),
	weight_sd = sd(weight)
	weight_dev = weight - weight_mean
	hindfoot_mean = mean(hindfoot_length, na.rm = TRUE),
	hindfoot_sd = sd(hindfoot_length, na.rm = TRUE),
	hindfoot_dev = hindfoot_length - hindfoot_mean) %>%
	select(species, sex,
	weight, weight_dev, weight_mean,
	hindfoot_length, hindfoot_dev, hindfoot_mean
	)



	## Tidy data:
	# Each variable has its own column
	# Each observation has its own row
	# Each value must have its own cell
	# Each type of observational unit forms a table
	surveys_gw <- surveys %>%
	filter(!is.na(weight)) %>%
	group_by(genus, plot_id) %>%
	summarize(mean_weight = mean(weight))

	## Spread
	# https://datacarpentry.org/R-ecology-lesson/img/spread_data_R.png
	help(spread)

	surveys_gw %>%
	spread(genus, mean_weight) %>%
	head()

	surveys_wide <- surveys_gw %>%
	spread(key = genus, value = mean_weight, fill = 0)
	head(surveys_spread)

	## Gather
	# https://datacarpentry.org/R-ecology-lesson/img/gather_data_R.png
	surveys_wide %>%
	gather(key = genus, value = mean_weight, -plot_id)

	surveys_long <- surveys_wide %>%
	gather(key = genus, value = mean_weight, -plot_id)


	#########################################
	# CODE ACTUALLY WRITTEN DURING THE LESSON
	#########################################

	install.packages("tidyverse")
	install.packages(c("here", "reprex"))

	### Resources
	# Stack overflow: stackoverflow.com
	# Jenny Bryan (@jennybryan) and the #rstats hashtag on twitter
	# Advanced R book (for all levels): https://adv-r.hadley.nz/
	# Geocomputation for R book: https://geocompr.robinlovelace.net/


	### R Package examples
	# https://github.com/ropensci/rnoaa
	# https://github.com/tidyverse/reprex
	# https://cran.r-project.org/web/packages/rnassqs/index.html

	#### Setup and load libraries ----
	install.packages(c("here", "reprex"))

	library(tidyverse)
	library(here)
	library(reprex)
	library(purrr)
	library(forcats)

	dir.create("data_raw")
	dir.create("data")
	dir.create("fig")

	if(!dir.exists("src")) {
	dir.create("src")
	}


	#### Load and setup data ----
	download.file(url="https://ndownloader.figshare.com/files/2292169",
	destfile = "data_raw/rodent_counts.csv")

	surveys <- read_csv(here("data_raw/rodent_counts.csv"))

	dim(surveys)

	head(surveys)

	class(surveys)

	surveys

	surveys_alt <- read.csv("data_raw/rodent_counts.csv")
	class(surveys_alt)

	## Using dplyr
	surveys[1:3, 1]

	select(surveys, 1)
	select(surveys, record_id, year)

	head(surveys)
	select(surveys, -genus, -species)

	surveys[surveys$year < 1995,]

	filter(surveys, year < 1995)
	filter(surveys, !is.na(weight))

	## Exercise: Filter weight less than 5 and include species_id, sex, and weight
	filteredWeight <- filter(surveys,weight<5)
	select(filteredWeight,species_id,sex,weight)

	filter(surveys, weight < 5) %>%
	select(species_id, sex, weight)

	surveys %>%
	filter(weight < 5) %>%
	select(species_id, sex, weight)

	## Mutate / transmute
	surveys %>%
	filter(!is.na(weight)) %>%
	mutate(weight_kg = weight/1000,
	weight_lb = weight_kg * 2.2) %>%
	select(record_id, weight, weight_kg, weight_lb)

	surveys %>%
	filter(!is.na(weight)) %>%
	transmute(record_id,
	weight_kg = weight/1000,
	weight_lb = weight_kg * 2.2)

	## Exercise
	# 1. contains only the species_id column and a new column hindfoot_half
	# 2. hindfoot_half = hindfoot_length / 2
	# 3. values of hindfoot_half < 30 and are not NA
	surveys %>%
	transmute(species_id,
	hindfoot_half=hindfoot_length/2) %>%
	filter(hindfoot_half < 30, !is.na(hindfoot_half))

	surveys %>%
	transmute(species_id,
	hindfoot_half = hindfoot_length / 2) %>%
	filter(!is.na(hindfoot_half),
	hindfoot_half < 30)

	surveys %>%
	filter(!is.na(hindfoot_length)) %>%
	filter(hindfoot_length<60) %>%
	transmute(species_id,
	hindfoot_half=hindfoot_length/2)

	surveys %>%
	mutate(hindfoot_half = hindfoot_length / 2) %>%
	filter(hindfoot_half < 30) %>%
	filter(!is.na(hindfoot_length)) %>%
	select(species_id, hindfoot_half)

	surveys %>%
	filter(!is.na(hindfoot_length) & hindfoot_length < 60) %>%
	transmute(hindfoot_half = hindfoot_length/2) %>%
	select(species_id,hindfoot_length,hindfoot_half)





	surveys %>%
	mutate(hindfoot_half = hindfoot_length/2)

	## Grouping variables and applying
	## Split-Apply-Combine
	names(surveys)

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	summarize(
	weight = mean(weight))

	ab <- surveys %>%
	filter(species_id == "AB") %>%
	select(species_id, sex, weight)

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	summarize(
	weight_mean = mean(weight),
	weight_sd = sd(weight),
	weight_min = min(weight),
	grp_n = n())

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	filter(weight == min(weight))

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	filter(weight == min(weight))

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	summarize(weight_mean = mean(weight)) %>%
	arrange(desc(weight_mean))

	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	filter(row_number() == n())

	## Exercise:
	# Select the row with the maximum weight for each species_id and sex
	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	arrange(weight)
	filter(row_number() == n())

	x <- data.frame(
	record_id = c("a", "b", "c"),
	weight = c(1,4.5,3)
	)

	x

	x %>%
	arrange(weight) %>%
	filter(row_number() == n())

	x %>%
	arrange(desc(weight)) %>%
	filter(row_number() == 1)


	surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, sex) %>%
	filter(weight == max(weight)) %>%
	filter(row_number()==1)

	surveys %>%
	filter(!is.na(weight), !is.na(sex)) %>%
	group_by(species_id, sex) %>%
	arrange(desc(weight)) %>%
	filter(row_number() == 1)

	## Means and standard deviations by group (species_id, plot_id)
	gm <- surveys %>%
	filter(!is.na(weight)) %>%
	group_by(species_id, plot_id) %>%
	summarize(weight_gm = mean(weight))

	# So that we don't have to filter by is.na every time
	surveys_w_weight <- surveys %>%
	filter(!is.na(weight))

	gd <- surveys_w_weight %>%
	group_by(species_id, plot_id) %>%
	transmute(record_id, weight_gd = weight - mean(weight)) %>%
	ungroup()
	head(gd)

	## Merging data
	# left_join(x,y, by = <some variable>) # keeps all records from x
	# right_join(x,y, by = ...) #keeps all records from y and only matching from x
	# inner_join(x,y, by = ...) #keeps only records that matched from both
	# full_join(x,y, by = ...) # Keeps all records from x and all from y
	surveys2 <- left_join(surveys, gm, by = c("species_id", "plot_id")) %>%
	select(record_id, species_id, plot_id, weight, weight_gm) %>%
	head()

	surveys3 <- left_join(surveys2, select(gd, -species_id, -plot_id), by = "record_id")
	head(surveys3)

	# Exercise
	# 1. Create a data set like surveys but with the two additional variables:
	# a. weight_gm
	# b. weight_gd
	surveys_w_weight %>%
	group_by(species_id, plot_id) %>%
	mutate(weight_gm = mean(weight),
	weight_gd = weight - weight_gm) %>%
	select(record_id, species_id, plot_id,
	weight, weight_gm, weight_gd) %>%
	head()

	head(surveys3)

	## One more thing on merging
	surveys3a <- Reduce(left_join, list(surveys, gm, gd))

	## Tidy Data
	# Each variable has it's own column
	# Each observation has it's own row
	# Each value to have it's own cell
	head(surveys)

	surveys_gm <- surveys_w_weight %>%
	group_by(genus, plot_id) %>%
	summarize(weight_mean = mean(weight))

	surveys_gm

	## Long to wide
	surveys_wide <- surveys_gm %>%
	spread(key = genus, value = weight_mean)

	## Wide to long
	surveys_long <- surveys_wide %>%
	gather(key = genus, value = weight_mean, -plot_id)

	head(surveys_long)
	head(surveys_gm)

	# Exercise Transform to long so that each column has the year
	surveys_year <- surveys %>%
	filter(!is.na(weight)) %>%
	select(plot_id, year, weight) %>%
	group_by(plot_id, year) %>%
	summarize(weight_mean = mean(weight)) %>%
	spread(key = year, value = weight_mean)

	## Save your output!!!!
	write_csv(surveys_year, "data/surveys_year.csv")