Created
December 11, 2020 21:13
-
-
Save mtreg/add9ecc2cfe50d8b60194969dc8e2c9c to your computer and use it in GitHub Desktop.
R code that aggregates data from different gdb files that is thematically the same, based on a prefix of layer names (defined by user), and standardizes to a user-defined coordinate reference system
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################ | |
#' Compile layers from different gdb files with same, specified prefix, and desired coordinate system | |
#' | |
#' Compiles layers with standard prefix from multiple gdb files (specified by user) into single sf object. | |
#' This was originally developed to get respective layers from separate geodatabases that resulted from running the ACPF toolbox | |
#' into single layers (e.g., all stream reach datasets into one layer, rather than spread across many gdb files) but may have broader applicability. | |
#' (ACPF stands for Agricultural Conservaton Planning Framework; see https://acpf4watersheds.org/toolbox/) | |
#' | |
#' The handling of different projections is currently fairly crude - it just automatically reprojects files to the desired projection | |
#' (even if already in that projection), though that step is generally fairly quick. | |
#' | |
#' | |
#' @param folder Folder with gdb files of interest | |
#' @param prefix Common prefix for layers to combine. | |
#' @param CRS Coordinate Reference System to project data to. If EPSG code, integer without quotes will work, but if proj4 code, use quotes. | |
#' @return sf object | |
#' @author Mike Treglia \email{michael.treglia@@tnc.org} | |
#' #' @example | |
#' ## Example assuming multiple gdb files are in D:/gdb_testing, and StreamReach layers are desired | |
#' sr <- gdbs_compile_layers(folder="D:/gdb_testing/", prefix="StreamReach", CRS=2263) | |
#' | |
#' # Users can easily explore results using the mapview package (not recommended for huge datasets) | |
#' # mapview::mapview(sr) | |
#' | |
#' ## Users can write out results to file - e.g., using st_write | |
#' # st_write(sr, dsn="D:/gdb_testing/StreamReach_Layers_Compiled.gpkg", layer="StreamReach_Layers_Compiled") | |
#' | |
#' | |
gdbs_compile_layers <- function(folder, prefix, CRS) { | |
## Create a vector with the names of the gdb files | |
require(sf) | |
lf <- | |
list.files(path = folder, | |
pattern = "*.gdb", | |
full.names = TRUE) | |
## Create a list that is the same length (# of elements) as number of gdb folders | |
l1 <- vector(mode = "list", length = length(lf)) | |
## Fill in the list with a for loop | |
## Each element of the list is a data frame with a column of gdb name ('gdb') and column of layer name ('layer') | |
for (i in 1:length(lf)) { | |
l1[[i]] <- | |
data.frame( | |
gdb = rep(lf[i], length(sf::st_layers(lf[i])$name)), | |
layer = sf::st_layers(lf[i])$name, | |
stringsAsFactors = FALSE | |
) | |
} | |
## Create one dataframe of all gdb file names and layers | |
gdb.layers <- do.call("rbind", l1) | |
## Select only rows that match have the desired prefix (StreamReach in this case) | |
newdata <- gdb.layers[grepl(prefix, gdb.layers$layer),] | |
## Create new vector, l2, which will become a list of the respective sf objects | |
l2 <- vector(mode = "list", length = nrow(newdata)) | |
## Read in the sf objects | |
for (i in 1:length(lf)) { | |
l2[[i]] <- | |
sf::st_read( | |
dsn = newdata$gdb[i], | |
layer = newdata$layer[i], | |
stringsAsFactors = FALSE, | |
quiet=TRUE | |
) | |
l2[[i]] <- | |
sf::st_transform(l2[[i]], crs=CRS) | |
} | |
## Put together full dataset | |
newdata_full <- do.call(rbind, l2) | |
print(paste("layers beginning with '", prefix, "' were compiled with the following CRS: '", | |
CRS, "'.", sep="" )) | |
return(newdata_full) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment