Skip to content

Instantly share code, notes, and snippets.

@zachcp
Created September 19, 2022 14:22
Show Gist options
  • Save zachcp/ac410b900d67264ffb8061d2b8fa45bf to your computer and use it in GitHub Desktop.
Save zachcp/ac410b900d67264ffb8061d2b8fa45bf to your computer and use it in GitHub Desktop.
Metabolomics benchmarks update
# install.packages("microbenchmark")
# install.packages("plyr")
#
# install.packages("rcdk")
# install.packages("OrgMassSpecR")
# install.packages("CHNOSZ")
#
# BiocManager::install("Rdisop")
# BiocManager::install("MetaboCoreUtils")
#
# BiocManager::install("ChemmineR")
# BiocManager::install("ChemmineOB")
#
# BiocManager::install("enviPat")
## These require loading the packages explicitly
library(plyr)
library(CHNOSZ)
library(enviPat)
library(MetaboCoreUtils)
library(rcdk)
library(ChemmineR)
#library(ChemmineOB)
library(OrgMassSpecR)
library(Rdisop)
data(isotopes)
# original
# https://github.com/CDK-R/cdkr/blob/master/rcdk/R/formula.R
# get.formula <- function(mf, charge=0) {
#
# manipulator <- get("mfManipulator", envir = .rcdk.GlobalEnv)
# if(!is.character(mf)) {
# stop("Must supply a Formula string");
# }else{
# dcob <- .cdkFormula.createChemObject()
# molecularformula <- .cdkFormula.createFormulaObject()
# molecularFormula <- .jcall(manipulator,
# "Lorg/openscience/cdk/interfaces/IMolecularFormula;",
# "getMolecularFormula",
# mf,
# .jcast(molecularformula,.IMolecularFormula),
# TRUE);
# }
#
# D <- new(J("java/lang/Integer"), as.integer(charge))
# .jcall(molecularFormula,"V","setCharge",D);
# object <- .cdkFormula.createObject(.jcast(molecularFormula,.IMolecularFormula));
# return(object);
# }
mfManipulator <- J("org/openscience/cdk/tools/manipulator/MolecularFormulaManipulator")
silentchemobject <- J("org.openscience.cdk.silent.SilentChemObjectBuilder")
#' Rewrite the formual object and directly access Java
#'
get.formula2 <- function(mf) {
formula <- mfManipulator$getMolecularFormula(
"C2H3",
silentchemobject$getInstance())
mfManipulator$getMass(formula)
}
#' Add type hints
#'
get.formula3 <- function(mf) {
builderinstance <- .jcall(silentchemobject,
"Lorg/openscience/cdk/interfaces/IChemObjectBuilder;",
"getInstance")
formula <- .jcall(mfManipulator,
"Lorg/openscience/cdk/interfaces/IMolecularFormula;",
"getMolecularFormula",
mf,
builderinstance);
mfManipulator$getMass(formula)
}
#' Add type hints
#'
get.formula4 <- function(mf) {
builderinstance <- .jcall(silentchemobject,
"Lorg/openscience/cdk/interfaces/IChemObjectBuilder;",
"getInstance")
formula <- .jcall(mfManipulator,
"Lorg/openscience/cdk/interfaces/IMolecularFormula;",
"getMolecularFormula",
mf,
builderinstance);
.jcall(mfManipulator,
"D",
"getMass",
formula)
}
benchmark <- microbenchmark::microbenchmark(
MetaboCoreUtils = MetaboCoreUtils::calculateMass("C2H6O"),
rcdk = rcdk::get.formula("C2H6O", charge = 0)@mass,
rcdk2 = get.formula2("C2H6O"),
rcdk3 = get.formula3("C2H6O"),
rcdk4 = get.formula4("C2H6O"),
Rdisop = Rdisop::getMolecule("C2H6O")$exactmass,
ChemmineR = ChemmineR::exactMassOB(ChemmineR::smiles2sdf("CCO")),
OrgMassSpecR = OrgMassSpecR::MonoisotopicMass(formula = OrgMassSpecR::ListFormula("C2H6O)"), charge = 0),
CHNOSZ = CHNOSZ::mass("C2H6O"),
enviPat = enviPat::isopattern(isotopes, "C2H6O", charge=FALSE, verbose=FALSE)[[1]][1,1]
, times=1000L)
masses <- c(
MetaboCoreUtils=MetaboCoreUtils::calculateMass("C2H6O"),
rcdk=rcdk::get.formula("C2H6O", charge = 0)@mass,
Rdisop=Rdisop::getMolecule("C2H6O")$exactmass,
#ChemmineR=ChemmineR::exactMassOB(ChemmineR::smiles2sdf("CCO")),
OrgMassSpecR=OrgMassSpecR::MonoisotopicMass(formula = OrgMassSpecR::ListFormula("C2H6O)"), charge = 0),
CHNOSZ=CHNOSZ::mass("C2H6O"),
enviPat=enviPat::isopattern(isotopes, "C2H6O", charge=FALSE, verbose=FALSE)[[1]][1,1]
)
options(digits=10)
t(t(sort(masses)))
summary(benchmark)[order(summary(benchmark)[,"median"]) , ]
clipr::write_clip(as.data.frame(summary(benchmark)[order(summary(benchmark)[,"median"]) , ] ))
expr min lq mean median uq
1 MetaboCoreUtils 69.479 122.8465 154.049427 139.6495 156.2700
10 enviPat 83.250 143.0935 170.429197 160.5360 179.6570
5 rcdk4 175.889 228.8605 324.182735 271.2955 327.7135
8 OrgMassSpecR 249.287 333.3135 392.479869 357.6665 401.5585
6 Rdisop 382.417 459.8790 538.068697 490.1505 557.9975
9 CHNOSZ 355.145 510.2910 588.186951 555.9165 632.2060
4 rcdk3 781.987 1004.7160 1294.507318 1133.3415 1339.4695
3 rcdk2 2078.465 2392.4950 2920.601088 2612.8025 2931.5465
7 ChemmineR 3227.320 3790.0455 4808.783873 4044.1410 4465.1000
2 rcdk 14823.815 16456.7715 19088.569430 17485.0800 19468.7195
@sneumann
Copy link

Cool, waaay faster. Should that become a PR for
https://github.com/CDK-R/cdkr/blob/7ebaa8d389f0987fcf261cb7fcaf4a5699b0cf50/rcdk/R/formula.R#L43
?
Yours, Steffen

@zachcp
Copy link
Author

zachcp commented Sep 19, 2022

@sneumann its not that simple. the formula object that is built might be used for other things. A straight substitution is not backwards compatible.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment