Tmc                  package:BGcom                  R Documentation

_F_u_n_c_t_i_o_n _t_o _c_a_l_c_u_l_a_t_e _t_h_e _e_m_p_i_r_i_c_a_l _n_u_l_l _d_i_s_t_r_i_b_u_t_i_o_n _o_f _m_a_x(_T)

_D_e_s_c_r_i_p_t_i_o_n:

     This function use bootstrap for calculating the empirical
     distribution of max(T)  under the null hypothesis of independence
     between the two experiments. An empirical p-value is calculated to
     evaluate how the data  are far from the hypothesis of independence

_U_s_a_g_e:

     Tmc(repl, output.ratio, dir, data)

_A_r_g_u_m_e_n_t_s:

    data: The data matrix for the experiments to be compared

     dir: directory for storing the plots

    repl: Number of replicates to be performed

output.ratio: The output object from the ratio function

_D_e_t_a_i_l_s:

     This function uses bootstrap for calculating the empirical
     distribution of the maximum of T (i.e. T(q*)) under the null
     hypothesis of independence between the two experiments. The
     pvalues of one list are randomly permuted B times, while the ones
     for the other list are keeping fixed. In this way, any
     relationship between the two lists is destroyed. At each
     permutation b (b varies from 1 to B) a Tb(q) is calculated for
     each q and a maximum statistic Tb(q*) is returned; its
     distribution represents the null distribution under the condition
     of independence. The relative frequency of Tb(q*) larger than
     T(q*) is noted as empirical p value: it returns the proportion of
     Tb(q*) from permuted dataset greater than the observed one (so
     indicates where the observed T(q*) is located on the null
     distribution).

_V_a_l_u_e:

     Tmc:  Returns the empirical pvalue from testing T(q*).

_A_u_t_h_o_r(_s):

     M. Blangiardo

_R_e_f_e_r_e_n_c_e_s:

     Stone et al.(1988), Investigations of excess environmental risks
     around putative sources: statistical problems and a proposed
     test,Statistics in Medicine, 7, 649-660.

     M.Blangiardo and S.Richardson Statistical tools for synthesizing
     lists of differentially expressed features in related experiments,
     Genome Biology, 8, R54

_E_x_a_m_p_l_e_s:

     data = simulation(n=500,GammaA=1,GammaB=1,r1=0.5,r2=0.8,DEfirst=300,DEsecond=200,DEcommon=100)
     T<- ratio(data$Pval,interval=0.01,dir="D:/",name="CompData1Data2",pvalue=TRUE)
     bootstrap<- Tmc(data$Pval,repl=100,output.ratio=T,dir="D:/")

     ## The function is currently defined as
     function(repl,output.ratio,dir,data){
         if(output.ratio$pvalue==FALSE){
         data=1-data
         }

         lists = ncol(data)
         l=length(output.ratio$DECommon)
         Tmax = max(output.ratio$ratios,na.rm=TRUE)
         Tmax.null = rep(NA,repl)
         ratios.null = matrix(NA,l,repl)
         sample = matrix(NA,dim(data)[1],lists)
         
         sample[,1] <- data[,1]
         for(k in 1:repl){
         int = c()
         L=matrix(0,l,lists)
         data1 = matrix(NA,dim(data)[1],lists)
         data1[,1] <- data[,1]

             for(j in 2:lists){
                     sample[,j] = sample(data[,j])
                     data1[,j] = sample[,j]
                     }
         
         threshold = output.ratio$q
         for(i in 1:l){
                     temp = data1<=threshold[i]
                     for(j in 1:lists){
                     L[i,j] <- sum(temp[,j])
                     temp[temp[,j]==FALSE,j]<-0
                     temp[temp[,j]==TRUE,j]<-1
                     }
                     int[i] <- sum(apply(temp,1,sum)==lists) 
                             }


     expected = apply(L,1,prod)/(dim(data)[1])^(lists-1)
     observed = int
     ratios = matrix(0,l,1)

     for(i in 1:l){
         ratios[i,1] <- observed[i]/expected[i]
         }
     ratios.null[,k] <- ratios
     ratios <- ratios[threshold>0]
     Tmax.null[k] = max(ratios)
     }

     ID=seq(1,repl)
     p=length(ID[Tmax.null>=Tmax])
     pvalue<- p/repl

     postscript(paste("Pvalue","_",output.ratio$name,".ps"))
     hist(Tmax.null,main="",xlab="T",ylab="",xaxt="n",cex.main=0.9,xlim=c(min(Tmax.null),max(c(Tmax,max(Tmax.null)))),yaxt="n",cex.axis=0.9)
     axis(1,at = seq(min(Tmax.null),max(c(Tmax,max(Tmax.null))),5),labels = round(seq(min(Tmax.null),max(c(Tmax,max(Tmax.null))),5),0))
     legend(x=Tmax/2,y=dim(data)[1]/100,legend=paste("P value =",pvalue),bty="n",cex=0.9)
     abline(v=Tmax,lty=2)
     dev.off()
     return(pvalue=pvalue)
     }

