Skip to content

Instantly share code, notes, and snippets.

@mmerce
Last active November 7, 2018 22:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmerce/d1c2937fbb6a1dd4443e8758a4fa7c48 to your computer and use it in GitHub Desktop.
Save mmerce/d1c2937fbb6a1dd4443e8758a4fa7c48 to your computer and use it in GitHub Desktop.
Batch centroids distances to all centroids
{
"name": "Batch centroid distances",
"description": "It creates a dataset with the distances to all centroids in a cluster",
"inputs": [
{
"name": "cluster-id",
"type": "cluster-id",
"description": "Cluster"
},
{
"name": "dataset-id",
"type": "dataset-id",
"description": "Dataset with the points to be measured"
}
],
"outputs": [
{
"name": "output-dataset",
"type": "dataset-id",
"description": "Dataset containing the distances to each centroid in the cluster."
}
]
}
;; Helper function that returns the list of effective (no-summary)
;; fields in a cluster
(define (cluster-fields cluster)
(let (m (cluster ["clusters" "fields"] {}))
(iterate (r {} k (keys (cluster "scales")))
(if (contains? m k) (assoc r k (m k)) r))))
;; Auxiliary function for error signaling.
(define (raise-missing id)
(raise {"message" (str "Missing input field: " id) "code" -1}))
;; Auxiliary function: constructs the flatline string that generates a
;; new field with the distance of each row to the given one.
(define (distance-flatline cluster instance)
(let (ids (keys (cluster-fields cluster))
ps (map (lambda (id) (or (instance id false) (raise-missing id))) ids)
scales (cluster "scales" {})
ws (map (lambda (id) (scales id 1)) ids))
(flatline "(row-distance-squared (list @{{ps}})"
" (fields @{{ids}})"
" (list @{{ws}}))")))
;; Given a cluster and one of its centroids, uses the flatline
;; string generated by `distance-flatline` to create a new
;; dataset that extend's the centroid dataset with a distance
;; column.
(define (generate-distance-dataset ds-id cluster cent fl)
(let (cluster-id (cluster "resource")
id (or (cent "id" false) (raise (str "No id in " cent))))
(create-and-wait-dataset {"origin_dataset" ds-id
"refresh_objective" true
"new_fields" [{"name" (str (cent "name")
" distance")
"field" fl}]})))
;; Final workflow.
(define (batch-centroids cluster-id dataset-id)
(let (cluster (fetch cluster-id)
centroids (cluster ["clusters" "clusters"]))
(head (iterate (r [dataset-id] cp centroids)
(let (fl (distance-flatline cluster (cp "center"))
ds-id (generate-distance-dataset (head r) cluster cp fl))
(cons ds-id r))))))
(define output-dataset (batch-centroids cluster-id dataset-id))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment