Last active
January 25, 2019 21:22
-
-
Save mmerce/0b1504075e5f5331ecc9b1677bf44bb4 to your computer and use it in GitHub Desktop.
Filtering a random row in a group
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "Pick random row in group", | |
"description": "Selects one of the rows (at random) from the ones grouped by a list of fields", | |
"inputs": [ | |
{ | |
"name": "dataset-id", | |
"type": "dataset-id", | |
"description": "The original dataset id to be filtered" | |
}, | |
{ | |
"name": "fields-list", | |
"type": "list", | |
"description": "List of the fields to be used to group the rows." | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "filtered-dataset", | |
"type": "dataset-id", | |
"description": "The dataset ID for the filtered rows." | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(define fields (resource-fields dataset-id)) | |
(define find-fields-fn (lambda (x) (find-field fields x))) | |
(define filter-id-fn (lambda (x) (when (map? x) (x "id")))) | |
(define order-by-fields (filter string? (map filter-id-fn (map find-fields-fn fields-list)))) | |
(if (= (count order-by-fields) 0) | |
(raise "Could not find a grouping field list") | |
(log-info order-by-fields)) | |
(define order-strs (append (map (lambda (x) (str "A.`" x "`")) order-by-fields) "RAND()")) | |
(define order-str (join ", " order-strs)) | |
(define filter-str-fn (lambda (x) (flatline "(= (f {{x}}) (f {{x}} -1))"))) | |
(define filter-strs (map filter-str-fn order-by-fields)) | |
(define filter-str (join " " filter-strs)) | |
(define ordered-dataset (create-and-wait-dataset {"origin_datasets" [dataset-id] | |
"sql_query" (flatline "select A.* from A order by {order-str}") | |
"origin_dataset_names" (assoc {} dataset-id "A")})) | |
(define filtered-dataset (try (create-and-wait-dataset {"origin_dataset" ordered-dataset | |
"lisp_filter" (flatline "(not (and {filter-str}))")}) | |
(catch e (log-info "Error: " e)))) | |
(delete ordered-dataset) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment