baymod                 package:BGcom                 R Documentation

_B_a_y_e_s_i_a_n _m_o_d_e_l _f_o_r _t_h_e _r_a_t_i_o _o_f _o_b_s_e_r_v_e_d _t_o _e_x_p_e_c_t_e_d _p_r_o_b_a_b_i_l_i_t_y _o_f _g_e_n_e_s _t_o _b_e _i_n _c_o_m_m_o_n

_D_e_s_c_r_i_p_t_i_o_n:

     This function specifies a Bayesian model for the ratio of observed
     to expected probability of genes to be in common. A multinomial
     distribution is specified on the 4 probabilities (DE in common, DE
     only in the first experiment, DE only in the second experiment,
     not DE) and we put a prior distribution on their parameters
     theta1, theta2, theta3, theta4. The quantity of interest now is
     the ratio of the probability that a gene is in common, to the
     probability that a gene is in common by chance (R)

_U_s_a_g_e:

     baymod(data, output.ratio, repl, dir)

_A_r_g_u_m_e_n_t_s:

    data: A matrix of the data from the experiments to be compared

output.ratio: The output object from the ratio function

    repl: Number of replicates to be performed

     dir: directory for storing the plots

_D_e_t_a_i_l_s:

     It returns an object of class list with the ratio R for each
     threshold and its quantiles (0.025,0.5,0.975). R(q) is significant
     if its CI does not include 1. We consider two rules fro selecting
     one lists: 1) qmax is the maximum of Median(R(q)) only for the
     subset of credibility intervals which do not include 1 2) q2 is
     the largest threshold where the number of genes called in common
     at least doubles the number of genes in common under independence
     (so where R(q) larger than 2)

     The function returns also a plot of the credibility interval for
     each threshold.

_V_a_l_u_e:

     A matrix with the 95% percentiles of R(q) for each p-value
     threshold

_A_u_t_h_o_r(_s):

     Marta Blangiardo

_R_e_f_e_r_e_n_c_e_s:

     M.Blangiardo and S.Richardson Statistical tools for synthesizing
     lists of differentially expressed features in related experiments,
     Genome Biology, 8, R54

_E_x_a_m_p_l_e_s:

     data = simulation(n=500,GammaA=1,GammaB=1,r1=0.5,r2=0.8,DEfirst=300,DEsecond=200,DEcommon=100)
     T<- ratio(data$Pval,interval=0.01,dir="D:/",name="CompData1Data2",pvalue=TRUE)
     BayesianModel<- baymod(data$Pval,repl=100,output.ratio=T,dir="D:/")

     ## The function is currently defined as
     function(data,output.ratio,repl,dir){
     if(output.ratio$pvalue==FALSE){
     data=1-data;
     }

     #Number of lists
     lists = dim(data)[2]

     #Calculate all the needed quantities 
     ID=seq(1,dim(data)[1])
     l=length(output.ratio$DECommon)

     #Design
     design = designMatrix(lists)
     row.names(design)<-seq(1,dim(design)[1])
     threshold = output.ratio$q
     O = matrix(0,l,(nrow(design)-1))

         for(i in 1:l){
         temp = matrix(0,dim(data)[1],lists)
                     for(j in 1:lists){
                             for(r in 1:dim(data)[1]){
                                     if(data[r,j]<=threshold[i]) {temp[r,j] = 1}
                                     }
                             }
     xx=designCount(temp,design)
     O[i,] <- xx
     }

     #Dirichlet prior
     prior.p=matrix(0.001,l,(nrow(design)-1))
     post.p=matrix(NA,l,(nrow(design)-1))

     p=array(NA,dim=c(l,(nrow(design)-1),repl))
     p.s = array(NA,dim=c(l,(nrow(design)-1),repl)) 
     marginal.p=array(NA,dim=c(l,lists,repl))
     ratio = matrix(NA,l,repl)

     for(i in 1:l){
             for(j in 1:(nrow(design)-1)){
         post.p[i,j] <- O[i,j] + prior.p[i,j]

         for(k in 1:repl){   
                 p.s[i,j,k] <- rgamma(1,post.p[i,j],1)}
                     }
             for(j in 1:(nrow(design)-1)){
                             for(k in 1:repl){   
                                     p[i,j,k] <- p.s[i,j,k]/dim(data)[1]
                                             }
                             }  

                     for(j in 1:lists){
                             for(k in 1:repl){
                             marginal.p[i,j,k] <- sum(p[i,(as.numeric(row.names(design[design[,j]==1,]))-1),k])
                                     }
                     }

                     for(k in 1:repl){
             ratio[i,k] <- p[i,(nrow(design)-1),k] / prod(marginal.p[i,,k])

                             }
                     }
     #########################################
     #CI for ratio
     quantile = matrix(NA,l,3)
     for(i in 1:l){
         quantile[i,1] <- quantile(ratio[i,],0.05,na.rm=TRUE)
         quantile[i,2] <- quantile(ratio[i,],0.5,na.rm=TRUE)
         quantile[i,3] <- quantile(ratio[i,],0.95,na.rm=TRUE)
     }

     lim1<-matrix(0,l,2)
     for (i in 1:l) {
     lim1[i,1]<-quantile[i,1]
     lim1[i,2]<-quantile[i,3]}
     y1<-seq(1:l)
     y1<-matrix(y1,l,2)
     Rmax = max(quantile[round(quantile[,1],2)>1,2])
     qmax = output.ratio$q[quantile[,2]==Rmax]

         if(length(output.ratio$q[quantile[round(quantile[,1],2)>1,2]>=2])>0){
             q2 = max(output.ratio$q[quantile[,2]>=2])
             R2 = quantile[output.ratio$q==q2,2]
          
             ps.options(horizontal=FALSE)
             setwd(dir)
             postscript(paste("bayCI","_",output.ratio$name,".ps"))
             plot(y1,lim1,xlab="P value",ylab="R",main="",pch="_",axes=TRUE,yaxt="n",xaxt="n",
             ylim=c(0,(max(quantile[1:l,3],na.rm=TRUE)+1*sd(quantile[1:l,3],na.rm=TRUE))),
             lwd=0.2)
             for (i in 1:l) lines(y1[i,],lim1[i,], lty=3,lwd=1.7)
             axis(2, at = c(0,0.5,1,1.5,R2,Rmax), labels = c(0,0.5,1,1.5,expression(R[2]),expression(R[max])), tick = TRUE,cex.axis=0.9)
             if(output.ratio$pvalue==TRUE){   
             axis(1, at = c((qmax*100),(q2*100),seq(((q2*100)+10),100,20)), labels = c(expression(q[max]),expression(q[2]),seq((q2+0.1),1,0.2)), tick=TRUE,cex=0.9)
             }
                     if(output.ratio$pvalue==FALSE){   
                     axis(1, at = c((qmax*100),(q2*100),seq(((q2*100)+10),100,20)), labels = c(expression(q[max]),expression(q[2]),1-seq((q2+0.1),1,0.2)), tick=TRUE,cex=0.9)
                     }
                     axis(4, at = c(1,R2,Rmax),labels = c(dim(data)[1],output.ratio$DECommon[output.ratio$q==q2],output.ratio$DECommon[output.ratio$q==qmax]),tick=TRUE,cex=0.9)
                     abline(h=1,col="black", lwd=1.5)
                     points(y1[,1],quantile[1:l,2],col="red",cex=0.5)
                     dev.off()
             }

         if(length(output.ratio$q[quantile[round(quantile[,1],2)>1,2]>=2])==0){

             ps.options(horizontal=FALSE)
             setwd(dir)
             postscript(paste("bayCI","_",output.ratio$name,".ps"))
             plot(y1,lim1,xlab="P value",ylab="R",main="",pch="_",axes=TRUE,yaxt="n",xaxt="n",
             ylim=c(0,(max(quantile[1:l,3],na.rm=TRUE)+1*sd(quantile[1:l,3],na.rm=TRUE))),
             lwd=0.2)
             for (i in 1:l) lines(y1[i,],lim1[i,], lty=3,lwd=1.7)
             axis(2, at = c(seq(0,(Rmax-0.5),0.5),Rmax), labels = c(seq(0,(Rmax-0.5),0.5),expression(R[max])), tick = TRUE,cex.axis=0.9)
             if(output.ratio$pvalue==TRUE){   
             axis(1, at = c((qmax*100),seq(((qmax*100)+10),100,20)), labels = c(expression(q[max]),seq((qmax+0.1),1,0.2)), tick=TRUE,cex=0.9)
             }
                     if(output.ratio$pvalue==FALSE){   
                     axis(1, at = c((qmax*100),seq(((qmax*100)+10),100,20)), labels = c(expression(q[max]),1-seq((qmax+0.1),1,0.2)), tick=TRUE,cex=0.9)
                     }
                     axis(4, at = c(1,Rmax),labels = c(dim(data)[1],output.ratio$DECommon[output.ratio$q==qmax]),tick=TRUE,cex=0.9)
                     abline(h=1,col="black", lwd=1.5)
                     points(y1[,1],quantile[1:l,2],col="red",cex=0.5)
                     dev.off()
             }

     return(quantile=quantile)
     }

