Skip to content

Instantly share code, notes, and snippets.

@mhkeller
Forked from abelsonlive/gist:3751902
Created October 4, 2012 15:45
Show Gist options
  • Save mhkeller/3834498 to your computer and use it in GitHub Desktop.
Save mhkeller/3834498 to your computer and use it in GitHub Desktop.
google geocoding API, R
Forked from @BrianAbelson
library('rjson') # For parsing json in R
library('RCurl') # For sending an http request
library('plyr') # For ddply
# Takes an data frame with a unique id column and an address column at the very least
# it returns a data frame with the uid, lat, lng and specificity of geocoding
# use the uid to join these results back to your data if you want
# you can also accomplish roughly the same thing with a for loop instead of ddply
# in that case, your original data frame can have empty columns for lat, lng, and specificity
# which are filled as the loop cycles.
# ddply gives you a fancy progress bar, though.
# But you can print the index of the for loop so that's pretty much a progress bar as well
geocode.addr <- function(uid_query) {
# Avoid rate limits by pausing from 1 to 3 seconds
Sys.sleep(sample(seq(1, 3, by=0.001), 1))
# The important columns we want from our passed row
uid <- uid_query$uid
query <- uid_query$loc
# You can also return the addresses as a simplified csv with this http://maps.google.com/maps/geo?output=csv&q=
# or change that to output=xml which has more detailed results.
# This JSON request has a lot of good detail if you need it and JSON is nicer
geo.url <- "http://maps.googleapis.com/maps/api/geocode/json?address="
geo.text <- try(getURL(paste(geo.url, URLencode(query), "&sensor=false", sep="")))
# If it didn't work with getURL, give it a go with readLines
if(class(geo.text)=="try-error"){
geo.text = try(readLines(paste(geo.url, URLencode(query), "&sensor=false", sep="")))
}
# Give up
if (class(geo.text)=="try-error"){
print(paste("having trouble reading this query:", uid))
}
geo.json <- fromJSON(geo.text)
# There are other data points you can grab but I'm most interested in these.
if(geo.json$status == "OK"){
print(uid)
lat = geo.json$results[[1]]$geometry$location$lat
lng = geo.json$results[[1]]$geometry$location$lng
type = geo.json$results[[1]]$geometry$location_type
info <- data.frame(uid, lat, lng, type, stringsAsFactors=F)
return(info)
} else{
if(geo.json$status == "OVER_QUERY_LIMIT") {
stop(paste("Hit rate limit at:", uid))
}
}
}
output = ddply(uid_address, .(uid), geocode.addr, .progress="text")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment