Skip to content

Instantly share code, notes, and snippets.

@djvanderlaan
Created December 8, 2013 13:45
Show Gist options
  • Save djvanderlaan/7857708 to your computer and use it in GitHub Desktop.
Save djvanderlaan/7857708 to your computer and use it in GitHub Desktop.
One to one matching using similarity measure
library(lpSolve)
pairs_select <- function(x, y, w, n = 1, m = 1) {
d <- data.frame(x=as.numeric(as.factor(x)), y=as.numeric(as.factor(y)), w=w)
nx <- length(unique(d$x))
ny <- length(unique(d$y))
C <- cbind(c(d$x, d$y + nx), seq_len(nrow(d)))
C <- cbind(C, 1)
res <- lp("max", d$w, dense.const = C, const.dir = rep("<=", nx+ny),
const.rhs = c(rep(n, nx), rep(m, ny)), all.bin=TRUE, use.rw=TRUE)
if (res$status != 0) warning("No solution found.")
which(res$solution > 0)
}
dequal <- function(x, y) {
if (is.factor(x)) x <- as.character(x)
if (is.factor(y)) x <- as.character(y)
1*(x == y)
}
dmatch <- function(x, y, distance = dequal, threshold = 0.0) {
d <- expand.grid(x=seq_along(x), y=seq_along(y))
d$xval <- x[d$x]
d$yval <- y[d$y]
d$w <- distance(d$xval, d$yval)
d <- d[d$w > threshold, ]
res <- pairs_select(d$x, d$y, d$w, n=1, m=1)
res <- d[res, ]
res$y[match(seq_along(x), res$x)]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment