Skip to content

Instantly share code, notes, and snippets.

@zachcp
Last active August 13, 2020 21:59
Show Gist options
  • Save zachcp/57e871dcb0869937b86c83a710cc496a to your computer and use it in GitHub Desktop.
Save zachcp/57e871dcb0869937b86c83a710cc496a to your computer and use it in GitHub Desktop.
NPAtlas DataLevin Test
wget https://www.npatlas.org/custom/versions/np_atlas_2020_06/NPAtlas_download.json
(require '[datalevin.core :as d])
(require '[clojure.data.json :as json])
(require '[medley.core :as medley])
(def np-schema
{
:smiles {:db/valueType :db.type/string}
:exact_mass {:db/valueType :db.type/double}
:npaid {:db/valueType :db.type/long
:db/unique :db.unique/value}
:m_plus_na {:db/valueType :db.type/double}
:m_plus_h {:db/valueType :db.type/double}
:mol_formula {:db/valueType :db.type/string}
:name {:db/valueType :db.type/string}
:id {:db/valueType :db.type/long}
:species {:db/valueType :db.type/string}
:genus {:db/valueType :db.type/string}
:inchi {:db/valueType :db.type/string}
:inchikey {:db/valueType :db.type/string}
;:synthesis {:db/valueType :db.type/string
; :db/cardinality :db.cardinality/many}
; :external_ids flattened to the following
:mibig-id {:db/valueType :db.type/string}
:gnps-id {:db/valueType :db.type/string}
}
)
(defn read-np-atlas []
(json/read-str (slurp "NPAtlas_download.json") :key-fn keyword))
(defn remove-if-empty [coll ky]
" if {ky []} then {}"
(medley.core/remove-kv (fn [k v] (and (= k ky) (empty? v))) coll))
(defn- convert-external-id-map [m]
{:pre [#(contains? m :external_db_name)
#(contains? m :external_db_code)
#(= 2 (count m))]
:post [#(string? (val %))]}
(let [db (get m :external_db_name)
val (get m :external_db_code)
new-id (keyword (str db "-id"))]
{new-id val}))
(defn external-ids-to-map [coll]
" [{:id1 1} {:id2 2}] => {:id1 1 :id2 2}"
(reduce merge {} (map convert-external-id-map coll)))
(defn flatten-external-ids [m]
(if-let [ex-ids (get m :external_ids)]
(merge (dissoc m :external_ids) (external-ids-to-map ex-ids))
m))
;; ---- Comment Blocks load data and then run queries.
(comment
;; Check to see how datalevin performs using a schema
; load data (use download script)
(def full-np-atlas (read-np-atlas))
; create conn for data-levin
(def conn (d/create-conn np-schema "/tmp/datalevin-test"))
; takes a while. Resulting DB size ~110MB
(doseq [entry full-np-atlas]
(let [mod (-> entry
; handle external_ids
(remove-if-empty :external_ids)
(flatten-external-ids)
;(dissoc :origin_organism :origin_reference :reassignments :external_ids :syntheses)
(dissoc :origin_organism :origin_reference :reassignments :syntheses :node_id :cluster_id)
(assoc :db/id -1))]
(d/transact! conn [mod])))
; works fine
(d/q '[:find ?name ?exact_mass ?e
:where
[?e :name ?name]
[?e :exact_mass ?exact_mass]
[(>= ?exact_mass 200) ]]
@conn)
; works fine
(d/q '[:find ?name ?e
:where
[?e :name ?name]
[(= ?name "Rapamycin")]]
@conn)
; broken
; Execution error (ExceptionInfo) at datalevin.lmdb.LMDB/get_value (lmdb.clj:706).
; Fail to get-value: "Thaw failed against type-id: 78"
(d/q '[:find ?name ?bcg ?smiles ?e
:where
[?e :name ?name]
[?e :mibig-id ?bcg]
[?e :smiles ?smiles]
]
@conn)
; broken
; Execution error (ExceptionInfo) at datalevin.lmdb.LMDB/get_value (lmdb.clj:706).
; Fail to get-value: "Thaw failed against type-id: 78"
(d/q '[:find ?name ?smiles ?e
:where
[?e :name ?name]
[?e :smiles ?smiles]]
@conn)
)
(comment
;; Check to see how datalevin performs using a schema
(def conn2 (d/create-conn {} "/tmp/datalevin-test-noschema"))
(doseq [entry full-np-atlas]
(let [mod (-> entry
; handle external_ids
(remove-if-empty :external_ids)
(flatten-external-ids)
;(dissoc :origin_organism :origin_reference :reassignments :external_ids :syntheses)
(dissoc :origin_organism :origin_reference :reassignments :syntheses :node_id :cluster_id)
(assoc :db/id -1))]
(d/transact! conn2 [mod])))
; works fine
(d/q '[:find ?name ?exact_mass ?e
:where
[?e :name ?name]
[?e :exact_mass ?exact_mass]
[(>= ?exact_mass 200) ]]
@conn2)
; works fine
(d/q '[:find ?name ?e
:where
[?e :name ?name]
[(= ?name "Rapamycin")]]
@conn2)
; broken
; Execution error (ExceptionInfo) at datalevin.lmdb.LMDB/get_value (lmdb.clj:706).
; Fail to get-value: "Thaw failed against type-id: 16"
(d/q '[:find ?name ?bcg ?smiles ?e
:where
[?e :name ?name]
[?e :mibig-id ?bcg]
[?e :smiles ?smiles]
]
@conn2)
; empty set?
(d/q '[:find ?name ?smiles ?e
:where
[?e :name ?name]
[?e :smiles ?smiles]
]
@conn)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment