Created
March 28, 2018 18:29
-
-
Save potterzot/5475ca68d2b5428044a605ce70969ef9 to your computer and use it in GitHub Desktop.
Using tidync versus ncdf4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This documents the ease with which I used `tidync` to extract data from the drought index database [SPEI](http://spei.csic.es/database.html#p3) compared with using `ncdf4` directly. | |
### Setup necessary for both methods | |
```{r} | |
# Establish dimensions ------------------------------------------------------------------ | |
### Time limits. | |
time_first_idx <- as.integer(difftime(as.Date("1990-01-01"), as.Date("1900-01-01"))) + 15 | |
### Spatial limits. These are based on available SCAD data points for | |
# Africa and Latin America, but including the whole continent, so all of | |
# Mexico and south and then the continent of Africa. | |
# Latitude: -57.75 to 37.75 degrees north | |
# Longitude: -117.75 to 59.75 degrees east | |
box <- list(lat = c(-57.75, 37.75), lon = c(-117.75, 57.75)) | |
# Open the file for reading and grab just the data we want. -------------------- | |
ncfile <- paste0(raw, "spei01.nc") | |
``` | |
### Using tidync | |
```{r} | |
### Get the data for the given dimensions | |
d <- tidync(ncfile) %>% | |
hyper_filter( | |
lat = between(lat, box$lat[[1]], box$lat[[2]]), | |
lon = between(lon, box$lon[[1]], box$lon[[2]]), | |
time = time >= time_first_idx | |
) %>% hyper_tibble() %>% | |
mutate(date = as.Date(time, origin = as.Date("1900-01-01"))) %>% | |
select(-c(time)) | |
``` | |
### Using ncdf4 | |
```{r} | |
con <- nc_open(ncfile) | |
### Defining Dimensions | |
## Dates ("time") are "days since 1900-1-1". Probably this actually means "Days | |
# starting with 1900-1-1" | |
time_vals <- con$dim$time$vals | |
# We start with 1990 January | |
time_start <- as.integer(difftime(as.Date("1990-01-01"), as.Date("1900-01-01"))) + 15 | |
time_start_idx <- which(time_start == time_vals) | |
time_count <- length(time_vals) - time_start_idx | |
first_date <- as.Date(time_vals[time_start_idx], origin = as.Date("1900-01-01")) | |
last_date <- as.Date(time_vals[length(time_vals)], origin = as.Date("1900-01-01")) | |
## Spatial points are defined by a box | |
# Latitude ("lat") is "degrees_north" and runs from -89.75 to 89.75, having | |
# 360 values | |
lat_vals <- con$dim$lat$vals | |
# Longitude ("lon") is "degrees_east" and runs from -179.75 to 179.25, having | |
# 720 values. | |
lon_vals <- con$dim$lon$vals | |
# create a grid of all the points | |
latlons <- expand.grid(lon_vals, lat_vals) | |
### Get the data | |
# We cycle through each time period, converting the data to long format, then | |
# append it all. We also limit by lat/lon to keep just Africa and LA | |
dim_counts <- c(720, 360, 1) | |
d <- rbindlist(lapply(0:time_count, function(t) { | |
t2 <- time_start_idx + t # this iterations time index | |
#initial dimension, collecting just 1 time dimension | |
dim_starts <- c(1, 1, t2) | |
# bind the data with the lat/lon points attached, and drop any NAs | |
d <- cbind( | |
latlons, | |
matrix(ncvar_get(con, "spei", start = dim_starts, count = dim_counts), ncol = 1)) | |
d <- data.table(d) | |
setnames(d, c("lon", "lat", "spei")) | |
# Filter just the lat/lons within the box and values over land | |
d <- d[lat %between% box$lat & lon %between% box$lon & is.na(spei)] | |
# Exclude NAs | |
# We don't want to do this so that we maintain a full grid. | |
#d <- d[!is.na(spei)] | |
# set the date | |
today <- as.Date(time_vals[t2], origin = as.Date("1900-01-01")) | |
d[, date := today] | |
d | |
})) | |
``` | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment