#Created by Marta Blangiardo (m.blangiardo@imperial.ac.uk) ############################################################################### #R code from the paper "Statistical Tools for SYnthesizing Lists of Differentially Expressed Features in microarray experiments" ################################################################################## #The code is a collection of three functions for analysing two real dataset. The first (real.ratio) #calculates the T(q) statistic. #The second (real.bootstrap) performs the permutation test on the conditional model and assigns a #MC p-value to it. #The third (real.baymod) performs the joint Bayesian model and retuns the Credibility Intervals at 95% ################################################################################## ############################## #Calculate the ratio between observed and expected number of genes in common ############################## real.ratio<-function(n,dir,data1,data2,interval,name){ ID=seq(1,n) l=length(seq(0,1,interval)) threshold = cbind(rep(NA,l),seq(0,1,interval)) int=c() L1=c() L2=c() for(i in 1:l){ threshold[i,1] <- (length(ID[data1<=threshold[i,2] & data2<=threshold[i,2]])) int[i]<- threshold[i,1] } threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),seq(0,1,interval)) for(i in 1:l){ threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,4]])/n; threshold.expected[i,2]<- length(ID[data2<=threshold.expected[i,4]])/n; threshold.expected[i,3] <- threshold.expected[i,1]*threshold.expected[i,2]*n L1[i] <- threshold.expected[i,1]*n L2[i] <- threshold.expected[i,2]*n } #Calculate the ratio for the number of observed genes/number of expected ones expected = threshold.expected[,3] observed = threshold[,1] ratios = matrix(0,l,1) for(i in 1:l){ ratios[i,1] <- observed[i]/expected[i] } ratios=ratios[int>0,1] thresh.ratios=threshold[int>0,2] ps.options(paper="a4",horizontal=TRUE) setwd(dir) ps.options(horizontal=F) postscript(paste("Ratio","_",name,".ps")) plot(thresh.ratios,ratios,type="l", ylab= "T",xlab="Pvalue",main="",yaxt="n",cex.main=0.7,cex.axis=1.2,ylim=c(0,(max(ratios,na.rm=T)+sd(ratios,na.rm=T)))) axis(2, at = c(seq(0,max(ratios,na.rm=T)+sd(ratios,na.rm=T),0.5),max(ratios,na.rm=T)), labels = c(seq(0,max(ratios,na.rm=T)+sd(ratios,na.rm=T),0.5),round(max(ratios,na.rm=T),1)), tick = TRUE,cex.axis=1.2) m=max(ratios,na.rm=T) r=thresh.ratios[ratios==m] common = threshold[threshold[,2]==r,1] l1 = threshold.expected[threshold.expected[,4]==r,1]*n l2 = threshold.expected[threshold.expected[,4]==r,2]*n legend(x=0.3,y=m/6,legend= expression(paste(O["1+"],"(q*)","=")),bty="n") legend(x=0.3,y=m/6*2,legend= expression(paste(O["+1"],"(q*)","=")),bty="n") legend(x=0.3,y=m/6*4,legend= expression(paste(O[11],"(q*)","=")),bty="n") legend(x=0.3,y=m/6*5,legend= expression(paste("q*","=")),bty="n") legend(x=0.3,y=m/6*6,legend= expression(paste(T("q*"),"=")),bty="n") legend(x=0.45,y=m/6,legend= l1,bty="n") legend(x=0.45,y=m/6*2,legend= l2,bty="n") legend(x=0.45,y=m/6*4,legend= common,bty="n") legend(x=0.4,y=m/6*5,legend= r,bty="n") legend(x=0.4,y=m/6*6,legend= round(m,2),bty="n") dev.off() return(list(ratios=ratios,thresh.ratios=thresh.ratios,L1=L1,L2=L2,int=int,interval=interval,name=name)) } ######################################################## ######################## #Empirical distribution# ######################## real.bootstrap<-function(n,repl,output.ratio,dir,data1,data2){ l=sum(output.ratio$int>0) max = max(output.ratio$ratios,na.rm=TRUE) max.null = rep(NA,repl) ratios.null = matrix(NA,l,repl) for(r in 1:repl){ sample1 = sample(data2) ID=seq(1,n) threshold = cbind(rep(NA,l),seq((101-l)/100,1,output.ratio$interval)) for(i in 1:l){ threshold[i,1] <- (length(ID[data1<=threshold[i,2] & sample1<=threshold[i,2]])) } threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),seq((101-l)/100,1,output.ratio$interval)) for(i in 1:l){ threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,4]])/n; threshold.expected[i,2]<- length(ID[sample1<=threshold.expected[i,4]])/n; threshold.expected[i,3] <- threshold.expected[i,1]*threshold.expected[i,2]*n } expected = threshold.expected[,3] observed = threshold[,1] ratios = matrix(0,l,1) for(i in 1:l){ ratios[i,1] <- observed[i]/expected[i] } ratios.null[,r] <- ratios ratios <- ratios[threshold[,1]>0] max.null[r] = max(ratios) } ID=seq(1,repl) p=length(ID[max.null>=max]) pvalue<- p/repl postscript(paste("Pvalue","_",output.ratio$name,".ps")) hist(max.null,main="",xlab="T",ylab="",cex.main=0.9,xlim=c(min(max.null),max(max(max.null),max)),yaxt="n") legend(x=max/2,y=n/4,legend=paste("Pvalue =",pvalue),bty="n",cex=0.9) abline(v=max,lty=2) dev.off() return(pvalue=pvalue) } ############################################### #MC function for the analysis done in wb ############################################### real.baymod<-function(data1,data2,n,output.ratio,repl,dir){ #Calculate all the needed quantities ID=seq(1,n) x=sum(output.ratio$int>0) l=length(seq((101-x)/100,1,output.ratio$interval)) threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),rep(NA,l),seq((101-x)/100,1,output.ratio$interval)) O11=c() O12=c() O21=c() O22=c() for(i in 1:l){ threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,5] & data2<=threshold.expected[i,5]]) O11[i]<- threshold.expected[i,1] threshold.expected[i,2]<- length(ID[data1<=threshold.expected[i,5] & data2>threshold.expected[i,5]]) O12[i]<- threshold.expected[i,2] threshold.expected[i,3]<- length(ID[data1>threshold.expected[i,5] & data2<=threshold.expected[i,5]]) O21[i]<- threshold.expected[i,3] threshold.expected[i,4]<- length(ID[data1>threshold.expected[i,5] & data2>threshold.expected[i,5]]) O22[i]<- threshold.expected[i,4] } #Dirichlet prior prior.p=matrix(0.001,l,4) post.p=matrix(NA,l,4) p=array(NA,dim=c(l,4,repl)) p.s = array(NA,dim=c(l,4,repl)) p1=matrix(NA,l,repl) p2=matrix(NA,l,repl) p3=matrix(NA,l,repl) p4=matrix(NA,l,repl) ratio = matrix(NA,l,repl) ranks = matrix(NA,l,repl) ord=matrix(NA,l,repl) for(i in 1:l){ post.p[i,1] <- O11[i] + prior.p[i,1] post.p[i,2] <- O12[i] + prior.p[i,2] post.p[i,3] <- O21[i] + prior.p[i,3] post.p[i,4] <- O22[i] + prior.p[i,4] for(j in 1:repl){ for(k in 1:4){ p.s[i,k,j] <- rgamma(1,post.p[i,k],1) } } for(j in 1:repl){ for(k in 1:4){ p[i,k,j] <- p.s[i,k,j]/(p.s[i,1,j]+p.s[i,2,j]+p.s[i,3,j]+p.s[i,4,j]) } } for(j in 1:repl){ p1[i,j] <- p[i,1,j] p2[i,j] <- p[i,1,j]+p[i,2,j] p3[i,j] <- p[i,1,j]+p[i,3,j] ratio[i,j] <- p1[i,j] / (p2[i,j]*p3[i,j]) } } #maxs = c() #qstar = c() #for(j in 1:repl){ #maxs[j] <- max(ratio[,j]); #qstar[j] <- threshold.expected[ratio[,j]==maxs[j],5] #} #CI for ratio quantile = matrix(NA,l,3) for(i in 1:l){ quantile[i,1] <- quantile(ratio[i,],0.025,na.rm=T) quantile[i,2] <- quantile(ratio[i,],0.5,na.rm=T) quantile[i,3] <- quantile(ratio[i,],0.975,na.rm=T) } lim1<-matrix(0,l,2) for (i in 1:l) { lim1[i,1]<-quantile[i,1] lim1[i,2]<-quantile[i,3]} y1<-seq(1:l) y1<-matrix(y1,l,2) ps.options(horizontal=F) setwd(dir) postscript(paste("bayCI","_",output.ratio$name,".ps")) plot(y1,lim1,xlab="P value",ylab="R",main="CI",pch="_",axes=T,yaxt="n",xaxt="n", ylim=c(0,(max(quantile[1:l,3],na.rm=T)+1*sd(quantile[1:l,3],na.rm=T))), lwd=0.2) for (i in 1:l) lines(y1[i,],lim1[i,], lty=3,lwd=1.7) axis(2,at=seq(0,round((max(quantile[1:l,3],na.rm=T)+1*sd(quantile[1:l,3],na.rm=T)),0)),cex.axis=0.9) axis(1, at = seq(1,l,4), labels = seq(threshold.expected[1,5],threshold.expected[l,5],output.ratio$interval*4), tick = TRUE,cex.axis=0.9) abline(h=1,col="black", lwd=1.5) points(y1[,1],quantile[1:l,2],col="red",cex=0.5) dev.off() return(quantile=quantile) } ############################################################################