#Created by Marta Blangiardo (m.blangiardo@imperial.ac.uk)
###############################################################################
#R code from the paper "Statistical Tools for SYnthesizing Lists of Differentially 
Expressed Features in microarray experiments"
##################################################################################
#The code is a collection of three functions for analysing two real dataset. The first (real.ratio) 
#calculates the T(q) statistic. 
#The second (real.bootstrap) performs the permutation test on the conditional model and assigns a 
#MC p-value to it.
#The third (real.baymod) performs the joint Bayesian model and retuns the Credibility Intervals at 95%
##################################################################################

##############################
#Calculate the ratio between observed and expected number of genes in common
##############################
real.ratio<-function(n,dir,data1,data2,interval,name){
ID=seq(1,n)
l=length(seq(0,1,interval))
threshold = cbind(rep(NA,l),seq(0,1,interval))
int=c()
L1=c()
L2=c()
    for(i in 1:l){
        threshold[i,1] <- (length(ID[data1<=threshold[i,2] & data2<=threshold[i,2]]))
        int[i]<- threshold[i,1]
        }
threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),seq(0,1,interval))
    for(i in 1:l){
        threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,4]])/n;
        threshold.expected[i,2]<- length(ID[data2<=threshold.expected[i,4]])/n;
        threshold.expected[i,3] <- threshold.expected[i,1]*threshold.expected[i,2]*n
        L1[i] <- threshold.expected[i,1]*n
        L2[i] <- threshold.expected[i,2]*n
        }

#Calculate the ratio for the number of observed genes/number of expected ones
expected = threshold.expected[,3]
observed = threshold[,1] 
ratios = matrix(0,l,1)

for(i in 1:l){
    ratios[i,1] <- observed[i]/expected[i]
    }

    ratios=ratios[int>0,1]
    thresh.ratios=threshold[int>0,2]
    ps.options(paper="a4",horizontal=TRUE)
    setwd(dir)
    ps.options(horizontal=F)
    postscript(paste("Ratio","_",name,".ps"))
    plot(thresh.ratios,ratios,type="l",
    ylab= "T",xlab="Pvalue",main="",yaxt="n",cex.main=0.7,cex.axis=1.2,ylim=c(0,(max(ratios,na.rm=T)+sd(ratios,na.rm=T))))
    axis(2, at = c(seq(0,max(ratios,na.rm=T)+sd(ratios,na.rm=T),0.5),max(ratios,na.rm=T)), labels = c(seq(0,max(ratios,na.rm=T)+sd(ratios,na.rm=T),0.5),round(max(ratios,na.rm=T),1)), tick = TRUE,cex.axis=1.2)
    m=max(ratios,na.rm=T)
    r=thresh.ratios[ratios==m]
    common = threshold[threshold[,2]==r,1]
    l1 = threshold.expected[threshold.expected[,4]==r,1]*n
    l2 =  threshold.expected[threshold.expected[,4]==r,2]*n
    legend(x=0.3,y=m/6,legend= expression(paste(O["1+"],"(q*)","=")),bty="n")
    legend(x=0.3,y=m/6*2,legend= expression(paste(O["+1"],"(q*)","=")),bty="n")
    legend(x=0.3,y=m/6*4,legend= expression(paste(O[11],"(q*)","=")),bty="n")
    legend(x=0.3,y=m/6*5,legend= expression(paste("q*","=")),bty="n")
    legend(x=0.3,y=m/6*6,legend= expression(paste(T("q*"),"=")),bty="n")
    legend(x=0.45,y=m/6,legend= l1,bty="n")
    legend(x=0.45,y=m/6*2,legend= l2,bty="n")
    legend(x=0.45,y=m/6*4,legend= common,bty="n")
    legend(x=0.4,y=m/6*5,legend= r,bty="n")
    legend(x=0.4,y=m/6*6,legend= round(m,2),bty="n")
dev.off() 
    return(list(ratios=ratios,thresh.ratios=thresh.ratios,L1=L1,L2=L2,int=int,interval=interval,name=name))
}

########################################################
########################
#Empirical distribution#
########################

real.bootstrap<-function(n,repl,output.ratio,dir,data1,data2){
    l=sum(output.ratio$int>0)
    max = max(output.ratio$ratios,na.rm=TRUE)
    max.null = rep(NA,repl)
    ratios.null = matrix(NA,l,repl)

for(r in 1:repl){
    sample1 = sample(data2)
    ID=seq(1,n)
    threshold = cbind(rep(NA,l),seq((101-l)/100,1,output.ratio$interval))

    for(i in 1:l){
        threshold[i,1] <- (length(ID[data1<=threshold[i,2] &    sample1<=threshold[i,2]]))
        }

    threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),seq((101-l)/100,1,output.ratio$interval))
    for(i in 1:l){
        threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,4]])/n;
        threshold.expected[i,2]<- length(ID[sample1<=threshold.expected[i,4]])/n;
        threshold.expected[i,3] <- threshold.expected[i,1]*threshold.expected[i,2]*n
        }

expected = threshold.expected[,3]
observed = threshold[,1] 
ratios = matrix(0,l,1)

for(i in 1:l){
    ratios[i,1] <- observed[i]/expected[i]
    }
ratios.null[,r] <- ratios
ratios <- ratios[threshold[,1]>0]
max.null[r] = max(ratios)
}


ID=seq(1,repl)
p=length(ID[max.null>=max])
pvalue<- p/repl


postscript(paste("Pvalue","_",output.ratio$name,".ps"))
hist(max.null,main="",xlab="T",ylab="",cex.main=0.9,xlim=c(min(max.null),max(max(max.null),max)),yaxt="n")
legend(x=max/2,y=n/4,legend=paste("Pvalue =",pvalue),bty="n",cex=0.9)
abline(v=max,lty=2)
dev.off()

return(pvalue=pvalue)
}

###############################################
#MC function for the analysis done in wb
###############################################
real.baymod<-function(data1,data2,n,output.ratio,repl,dir){
#Calculate all the needed quantities 
ID=seq(1,n)
x=sum(output.ratio$int>0)
l=length(seq((101-x)/100,1,output.ratio$interval))
threshold.expected = cbind(rep(NA,l),rep(NA,l),rep(NA,l),rep(NA,l),seq((101-x)/100,1,output.ratio$interval))

O11=c()
O12=c()
O21=c()
O22=c()

    for(i in 1:l){
threshold.expected[i,1] <- length(ID[data1<=threshold.expected[i,5] & 
                data2<=threshold.expected[i,5]])
        O11[i]<- threshold.expected[i,1]

threshold.expected[i,2]<- length(ID[data1<=threshold.expected[i,5] & 
                data2>threshold.expected[i,5]])
        O12[i]<- threshold.expected[i,2]
        
threshold.expected[i,3]<- length(ID[data1>threshold.expected[i,5] & 
                data2<=threshold.expected[i,5]])
        O21[i]<- threshold.expected[i,3]

threshold.expected[i,4]<- length(ID[data1>threshold.expected[i,5] & 
                data2>threshold.expected[i,5]])
        O22[i]<- threshold.expected[i,4]
}


#Dirichlet prior
prior.p=matrix(0.001,l,4)
post.p=matrix(NA,l,4)

p=array(NA,dim=c(l,4,repl))
p.s = array(NA,dim=c(l,4,repl)) 
p1=matrix(NA,l,repl)
p2=matrix(NA,l,repl)
p3=matrix(NA,l,repl)
p4=matrix(NA,l,repl)

ratio = matrix(NA,l,repl)

ranks = matrix(NA,l,repl)
ord=matrix(NA,l,repl)

for(i in 1:l){
    post.p[i,1] <- O11[i] + prior.p[i,1]
    post.p[i,2] <- O12[i] + prior.p[i,2]
    post.p[i,3] <- O21[i] + prior.p[i,3]
    post.p[i,4] <- O22[i] + prior.p[i,4]


    for(j in 1:repl){   
        for(k in 1:4){
            p.s[i,k,j] <- rgamma(1,post.p[i,k],1)   
            }
        }

    for(j in 1:repl){   
        for(k in 1:4){
            p[i,k,j] <- p.s[i,k,j]/(p.s[i,1,j]+p.s[i,2,j]+p.s[i,3,j]+p.s[i,4,j])
            }
        }

    for(j in 1:repl){
        p1[i,j] <- p[i,1,j] 
        p2[i,j] <- p[i,1,j]+p[i,2,j]
        p3[i,j] <- p[i,1,j]+p[i,3,j]        
        
        ratio[i,j] <- p1[i,j] / (p2[i,j]*p3[i,j])
            }
    }

#maxs = c()
#qstar = c()

#for(j in 1:repl){
#maxs[j] <- max(ratio[,j]);
#qstar[j] <- threshold.expected[ratio[,j]==maxs[j],5]
#}

#CI for ratio
quantile = matrix(NA,l,3)
for(i in 1:l){
    quantile[i,1] <- quantile(ratio[i,],0.025,na.rm=T)
    quantile[i,2] <- quantile(ratio[i,],0.5,na.rm=T)
    quantile[i,3] <- quantile(ratio[i,],0.975,na.rm=T)
}

lim1<-matrix(0,l,2)
for (i in 1:l) {
lim1[i,1]<-quantile[i,1]
lim1[i,2]<-quantile[i,3]}
y1<-seq(1:l)
y1<-matrix(y1,l,2)

ps.options(horizontal=F)
setwd(dir)
postscript(paste("bayCI","_",output.ratio$name,".ps"))
plot(y1,lim1,xlab="P value",ylab="R",main="CI",pch="_",axes=T,yaxt="n",xaxt="n",
ylim=c(0,(max(quantile[1:l,3],na.rm=T)+1*sd(quantile[1:l,3],na.rm=T))),
lwd=0.2)
for (i in 1:l) lines(y1[i,],lim1[i,], lty=3,lwd=1.7)
axis(2,at=seq(0,round((max(quantile[1:l,3],na.rm=T)+1*sd(quantile[1:l,3],na.rm=T)),0)),cex.axis=0.9)
axis(1, at = seq(1,l,4), labels = seq(threshold.expected[1,5],threshold.expected[l,5],output.ratio$interval*4), tick = TRUE,cex.axis=0.9)
abline(h=1,col="black", lwd=1.5)
points(y1[,1],quantile[1:l,2],col="red",cex=0.5)
dev.off()

return(quantile=quantile)
}
############################################################################