Last active
November 7, 2018 22:26
-
-
Save mmerce/d1c2937fbb6a1dd4443e8758a4fa7c48 to your computer and use it in GitHub Desktop.
Batch centroids distances to all centroids
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Batch centroid distances", | |
"description": "It creates a dataset with the distances to all centroids in a cluster", | |
"inputs": [ | |
{ | |
"name": "cluster-id", | |
"type": "cluster-id", | |
"description": "Cluster" | |
}, | |
{ | |
"name": "dataset-id", | |
"type": "dataset-id", | |
"description": "Dataset with the points to be measured" | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "output-dataset", | |
"type": "dataset-id", | |
"description": "Dataset containing the distances to each centroid in the cluster." | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Helper function that returns the list of effective (no-summary) | |
;; fields in a cluster | |
(define (cluster-fields cluster) | |
(let (m (cluster ["clusters" "fields"] {})) | |
(iterate (r {} k (keys (cluster "scales"))) | |
(if (contains? m k) (assoc r k (m k)) r)))) | |
;; Auxiliary function for error signaling. | |
(define (raise-missing id) | |
(raise {"message" (str "Missing input field: " id) "code" -1})) | |
;; Auxiliary function: constructs the flatline string that generates a | |
;; new field with the distance of each row to the given one. | |
(define (distance-flatline cluster instance) | |
(let (ids (keys (cluster-fields cluster)) | |
ps (map (lambda (id) (or (instance id false) (raise-missing id))) ids) | |
scales (cluster "scales" {}) | |
ws (map (lambda (id) (scales id 1)) ids)) | |
(flatline "(row-distance-squared (list @{{ps}})" | |
" (fields @{{ids}})" | |
" (list @{{ws}}))"))) | |
;; Given a cluster and one of its centroids, uses the flatline | |
;; string generated by `distance-flatline` to create a new | |
;; dataset that extend's the centroid dataset with a distance | |
;; column. | |
(define (generate-distance-dataset ds-id cluster cent fl) | |
(let (cluster-id (cluster "resource") | |
id (or (cent "id" false) (raise (str "No id in " cent)))) | |
(create-and-wait-dataset {"origin_dataset" ds-id | |
"refresh_objective" true | |
"new_fields" [{"name" (str (cent "name") | |
" distance") | |
"field" fl}]}))) | |
;; Final workflow. | |
(define (batch-centroids cluster-id dataset-id) | |
(let (cluster (fetch cluster-id) | |
centroids (cluster ["clusters" "clusters"])) | |
(head (iterate (r [dataset-id] cp centroids) | |
(let (fl (distance-flatline cluster (cp "center")) | |
ds-id (generate-distance-dataset (head r) cluster cp fl)) | |
(cons ds-id r)))))) | |
(define output-dataset (batch-centroids cluster-id dataset-id)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment