Last updated: 2018-05-20

Code version: 488fcf2

library(mashr); library(miximash)

Loading required package: ashr

Loading required package: MCMCpack

Loading required package: coda

Loading required package: MASS

##
## Markov Chain Monte Carlo Package (MCMCpack)

## Copyright (C) 2003-2018 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park

##
## Support provided by the U.S. National Science Foundation

## (Grants SES-0350646 and SES-0350613)
##

source('../code/plotCormotif.R')

Simulation Design

In the simulation, \[c_{j2...R} = c_{j1}1 + \delta_{j}\] \[\hat{c}_{j}|c_{j} \sim N_{R}(c_{j}, \frac{1}{2}I)\] Let L be the contrast matrix. Therefore, \[\hat{\delta}_{j}|\delta_{j} = L\hat{c}_{j}|c_{j} \sim N_{R-1}(\delta_{j}, \frac{1}{2}LL')\]

We first generate the data:

set.seed(2018)
data = sim_contrast2(nsamp = 12000, ncond = 8)

This simulation routine creates a dataset with 8 conditions, and four different types of deviations \(\delta_{j}\): null, independent among conditions, condition-specific in condition 2, and shared (equal effects in all conditions). The data contains 10% non-null effects \[\delta_{j} \sim \frac{9}{10}N(0,0)+\frac{1}{30}N(0,I)+\frac{1}{30}N(0,11^{T})+\frac{1}{30}N(0,e_{1}e_{1}^{T})\]

Set up the contrast matrix and the mash contrast data object

R = 8
L = diag(R-1)
L = cbind(rep(-1, R-1), L)
row.names(L) = c('2-1','3-1','4-1','5-1','6-1','7-1','8-1')

mash_data = mash_set_data(Bhat=data$Chat, Shat=data$Shat)
mash_data_L = mash_set_data_contrast(mash_data, L)

Mashcommonbaseline

Set up the covariance matrices:

# canonical
U.c = cov_canonical(mash_data_L)
# data driven
m.1by1 = mash_1by1(mash_data_L, alpha=0)
strong = get_significant_results(m.1by1,0.05)
# only 1 strong samples

Fit mashcontrast model

mashcontrast.model = mash(mash_data_L, U.c, algorithm.version = 'R')

 - Computing 12000 x 181 likelihood matrix.
 - Likelihood calculations took 1.03 seconds.
 - Fitting model with 181 mixture components.
 - Model fitting took 2.37 seconds.
 - Computing posterior matrices.
 - Computation allocated took 0.73 seconds.

The log likelihood is

print(get_loglik(mashcontrast.model),digits = 10)

[1] -105519.4856

Use get_significant_results to find the indices of effects that are “significant”:

length(get_significant_results(mashcontrast.model))

[1] 68

The number of false positive is 1.

barplot(get_estimated_pi(mashcontrast.model),las = 2,cex.names = 0.7)

Mash

Indep.data = mash_set_data(mash_data_L$Bhat, mash_data_L$Shat)
Indep.m = mash(Indep.data, U.c)

 - Computing 12000 x 181 likelihood matrix.
 - Likelihood calculations took 0.26 seconds.
 - Fitting model with 181 mixture components.
 - Model fitting took 2.61 seconds.
 - Computing posterior matrices.
 - Computation allocated took 0.07 seconds.

The log likelihood is

print(get_loglik(Indep.m),digits = 10)

[1] -111502.5605

Use get_significant_results to find the indices of effects that are “significant”:

length(get_significant_results(Indep.m))

[1] 3765

The number of false positive is 3295.

barplot(get_estimated_pi(Indep.m),las = 2,cex.names = 0.7)

Miximash

fit = miximash(mash_data_L$Bhat, mash_data_L$Shat, mess = TRUE, K = 2:6)

[1] "We have run the first 5 iterations for K=3"
[1] "We have run the first 5 iterations for K=4"
[1] "We have run the first 5 iterations for K=5"
[1] "We have run the first 5 iterations for K=6"
[1] "We have run the first 10 iterations for K=6"
[1] "We have run the first 15 iterations for K=6"
[1] "We have run the first 20 iterations for K=6"

Loglikelihood

plot(2:6, fit$loglike[,2],type = "l",xlab = "K",ylab = "loglike")

plotMotif(fit$allmotif[[2]])

Discoveries:

sum(rowSums(fit$lfsr[[1]] <= 0.05) > 0)

[1] 235

Compare

The RRMSE plot:

delta = data$C %*% t(L)
barplot(c(sqrt(mean((delta - m.1by1$result$PosteriorMean)^2)/mean((delta - data$Chat%*%t(L))^2)), 
          sqrt(mean((delta - mashcontrast.model$result$PosteriorMean)^2)/mean((delta - data$Chat%*%t(L))^2)), 
          sqrt(mean((delta - Indep.m$result$PosteriorMean)^2)/mean((delta - data$Chat%*%t(L))^2)), 
          sqrt(mean((delta - fit$post_mean[[1]])^2)/mean((delta - data$Chat%*%t(L))^2))), ylim=c(0,0.8), names.arg = c('ash','mashcommonbaseline', 'mashIndep', 'miximash'), ylab='RRMSE')

We check the False Positive Rate and True Positive Rate. \[FPR = \frac{|N\cap S|}{|N|} \quad TPR = \frac{|CS\cap S|}{|T|} \]

Each effect is treated as a separate discovery in each condition

delta = data$C %*% t(L)

sign.test.mash = as.matrix(delta)*mashcontrast.model$result$PosteriorMean
sign.test.Indep = as.matrix(delta)*Indep.m$result$PosteriorMean
sign.test.ash = as.matrix(delta)*m.1by1$result$PosteriorMean
sign.test.mix = as.matrix(delta)*fit$post_mean[[1]]

thresh.seq = seq(0, 1, by=0.0005)[-1]
mashcontrast = matrix(0,length(thresh.seq), 2)
Indep = matrix(0,length(thresh.seq), 2)
Ash = matrix(0,length(thresh.seq), 2)
Mix = matrix(0,length(thresh.seq), 2)
colnames(mashcontrast) = colnames(Indep) = colnames(Ash) = colnames(Mix) = c('TPR', 'FPR')

for(t in 1:length(thresh.seq)){
  mashcontrast[t,] = c( sum(sign.test.mash>0 & mashcontrast.model$result$lfsr <= thresh.seq[t])/sum(delta!=0), sum(delta==0 & mashcontrast.model$result$lfsr <=thresh.seq[t])/sum(delta==0))
  
  Indep[t,] = c(sum(sign.test.Indep>0& Indep.m$result$lfsr <=thresh.seq[t])/sum(delta!=0),  sum(delta==0& Indep.m$result$lfsr <=thresh.seq[t])/sum(delta==0))
  
  Ash[t,] = c(sum(sign.test.ash>0& m.1by1$result$lfsr <=thresh.seq[t])/sum(delta!=0),  sum(delta==0& m.1by1$result$lfsr <=thresh.seq[t])/sum(delta==0))
  
  Mix[t,] = c(sum(sign.test.mix>0& fit$lfsr[[1]] <=thresh.seq[t])/sum(delta!=0),  sum(delta==0& fit$lfsr[[1]] <=thresh.seq[t])/sum(delta==0))
}

{plot(mashcontrast[,'FPR'], mashcontrast[,'TPR'], col='red',type='l',ylab="True Positive Rate",xlab="False Positive Rate")
lines(Indep[,'FPR'], Indep[,'TPR'])
lines(Ash[,'FPR'], Ash[,'TPR'], col='green')
lines(Mix[,'FPR'], Mix[,'TPR'], col='cyan')
legend('bottomright', c('mashcommonbaseline', 'mashIndep', 'ash', 'miximash'),col=c('red','black','green','cyan'),lty=c(1,1,1,1))
}

Session information

sessionInfo()

R version 3.4.4 (2018-03-15)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.4

Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] miximash_0.1-1 MCMCpack_1.4-2 MASS_7.3-50    coda_0.19-1   
[5] mashr_0.2-8    ashr_2.2-7    

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.16       compiler_3.4.4     git2r_0.21.0      
 [4] plyr_1.8.4         iterators_1.0.9    tools_3.4.4       
 [7] digest_0.6.15      evaluate_0.10.1    lattice_0.20-35   
[10] Matrix_1.2-14      foreach_1.4.4      yaml_2.1.19       
[13] parallel_3.4.4     mvtnorm_1.0-7      SparseM_1.77      
[16] stringr_1.3.0      knitr_1.20         MatrixModels_0.4-1
[19] REBayes_1.3        rprojroot_1.3-2    grid_3.4.4        
[22] rmarkdown_1.9      rmeta_3.0          magrittr_1.5      
[25] backports_1.1.2    codetools_0.2-15   htmltools_0.3.6   
[28] mcmc_0.9-5         assertthat_0.2.0   quantreg_5.35     
[31] stringi_1.2.2      Rmosek_8.0.69      pscl_1.5.2        
[34] doParallel_1.0.11  truncnorm_1.0-8    SQUAREM_2017.10-1

This R Markdown site was created with workflowr

Simulation with Signal - Compare miximash

Yuxin Zou

2018-05-19