baymod {BGcom}R Documentation

Bayesian model for the ratio of observed to expected probability of genes to be in common

Description

This function specifies a Bayesian model for the ratio of observed to expected probability of genes to be in common. A multinomial distribution is specified on the 4 probabilities (DE in common, DE only in the first experiment, DE only in the second experiment, not DE) and we put a prior distribution on their parameters theta1, theta2, theta3, theta4. The quantity of interest now is the ratio of the probability that a gene is in common, to the probability that a gene is in common by chance (R)

Usage

baymod(data1, data2, n, output.ratio, repl, dir)

Arguments

data1 The vector of the data from the first experiment
data2 The vector of the data from the second experiment
n length of the vector of pvalue to be simulated
output.ratio The output object from the sim.ratio function
repl Number of replicates to be performed
dir directory for storing the plots

Details

It returns an object of class list with the ratio R for each threshold and its quantiles (0.025,0.5,0.975). R(q) is significant if its CI does not include 1. We consider two rules fro selecting one lists: 1) qmax is the maximum of Median(R(q)) only for the subset of credibility intervals which do not include 1 2) q2 is the largest threshold where the number of genes called in common at least doubles the number of genes in common under independence (so where R(q) larger than 2)

The function returns also a plot of the credibility interval for each threshold.

Value

A matrix with the 95% percentiles of R(q) for each p-value threshold

Author(s)

Marta Blangiardo

References

M.Blangiardo and S.Richardson Statistical tools for synthesizing lists of differentially expressed features in related experiments (under revision)

Examples

data = simulation(n=2000,GammaA=1,GammaB=1,r1=0.5,r2=0.8,DEfirst=1000,DEsecond=800,DEcommon=700)
T<- ratio(n=length(data$Pval1),data1=data$Pval1,data2=data$Pval2,interval=0.01,dir="D:/",name="CompData1Data2",pvalue=TRUE)
BayesianModel<- baymod(n=length(data$Pval1),data1=data$Pval1,data2=data$Pval2,repl=100,output.ratio=T,dir="D:/")

## The function is currently defined as
function(data1,data2,n,output.ratio,repl,dir){
#Calculate all the needed quantities 
ID=seq(1,n)
x=sum(output.ratio$int>0)
l=length(seq((101-x)/100,1,output.ratio$interval))
threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),rep(NA,l),seq((101-x)/100,1,output.ratio$interval))

O11=c()
O12=c()
O21=c()
O22=c()

    for(i in 1:l){
threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,5] & 
                data2<=threshold.expected[i,5]])
        O11[i]<- threshold.expected[i,1]

threshold.expected[i,2]<- length(ID[data1<=threshold.expected[i,5] & 
                data2>threshold.expected[i,5]])
        O12[i]<- threshold.expected[i,2]
        
threshold.expected[i,3]<- length(ID[data1>threshold.expected[i,5] & 
                data2<=threshold.expected[i,5]])
        O21[i]<- threshold.expected[i,3]

threshold.expected[i,4]<- length(ID[data1>threshold.expected[i,5] & 
                data2>threshold.expected[i,5]])
        O22[i]<- threshold.expected[i,4]
  }

#Dirichlet prior
prior.p=matrix(0.001,l,4)
post.p=matrix(NA,l,4)

p=array(NA,dim=c(l,4,repl))
p.s = array(NA,dim=c(l,4,repl)) 
p1=matrix(NA,l,repl)
p2=matrix(NA,l,repl)
p3=matrix(NA,l,repl)
p4=matrix(NA,l,repl)

ratio = matrix(NA,l,repl)

ord=matrix(NA,l,repl)

for(i in 1:l){
    post.p[i,1] <- O11[i] + prior.p[i,1]
    post.p[i,2] <- O12[i] + prior.p[i,2]
    post.p[i,3] <- O21[i] + prior.p[i,3]
    post.p[i,4] <- O22[i] + prior.p[i,4]

    for(j in 1:repl){   
        for(k in 1:4){
            p.s[i,k,j] <- rgamma(1,post.p[i,k],1)   
            }
        }

    for(j in 1:repl){   
        for(k in 1:4){
            p[i,k,j] <- p.s[i,k,j]/(p.s[i,1,j]+p.s[i,2,j]+p.s[i,3,j]+p.s[i,4,j])
            }
        }

    for(j in 1:repl){
        p1[i,j] <- p[i,1,j] 
        p2[i,j] <- p[i,1,j]+p[i,2,j]
        p3[i,j] <- p[i,1,j]+p[i,3,j]        
        
        ratio[i,j] <- p1[i,j] / (p2[i,j]*p3[i,j])
            }
    }

#CI for ratio
quantile = matrix(NA,l,3)
for(i in 1:l){
    quantile[i,1] <- quantile(ratio[i,],0.05,na.rm=TRUE)
    quantile[i,2] <- quantile(ratio[i,],0.5,na.rm=TRUE)
    quantile[i,3] <- quantile(ratio[i,],0.95,na.rm=TRUE)
  }

lim1<-matrix(0,l,2)
for (i in 1:l) {
lim1[i,1]<-quantile[i,1]
lim1[i,2]<-quantile[i,3]}
y1<-seq(1:l)
y1<-matrix(y1,l,2)

ps.options(horizontal=FALSE)
setwd(dir)
postscript(paste("bayCI","_",output.ratio$name,".ps"))
plot(y1,lim1,xlab="P value",ylab="R",main="CI",pch="_",axes=TRUE,yaxt="n",xaxt="n",
ylim=c(0,(max(quantile[1:l,3],na.rm=TRUE)+1*sd(quantile[1:l,3],na.rm=TRUE))),
lwd=0.2)
for (i in 1:l) lines(y1[i,],lim1[i,], lty=3,lwd=1.7)
axis(2,at=seq(0,round((max(quantile[1:l,3],na.rm=TRUE)+1*sd(quantile[1:l,3],na.rm=TRUE)),0)),cex.axis=0.9)
axis(1, at = seq(1,l,4), labels = seq(threshold.expected[1,5],threshold.expected[l,5],output.ratio$interval*4), tick = TRUE,cex.axis=0.9)
abline(h=1,col="black", lwd=1.5)
points(y1[,1],quantile[1:l,2],col="red",cex=0.5)
dev.off()

return(quantile=quantile)
  }

[Package BGcom version 1.0 Index]