extractGenes              package:BGcom              R Documentation

_F_u_n_c_t_i_o_n _f_o_r _e_x_t_r_a_c_t_i_n_g _t_h_e _l_i_s_t_s _o_f _g_e_n_e_s _i_n _c_o_m_m_o_n

_D_e_s_c_r_i_p_t_i_o_n:

     Function that returns the lists of genes in common using the two
     suggested rules and additional ones defined by the user

_U_s_a_g_e:

     extractGenes(output.ratio,output.bay,gene.names,data,q=NULL)
             

_A_r_g_u_m_e_n_t_s:

output.ratio: The output object from the Frequentist model (ratio
          function)

output.bay: The output object from the Bayesian model (baymod function)

gene.names: ID of the genes (e.g Affy ID)

    data: Matrix of pvalues used for the analysis

       q: additional thresholds in the form of a vector to select a
          list of genes in common. If it is NULL only the Rmax and
          rule2 are used to select the lists of genes in common

_D_e_t_a_i_l_s:

     To select a list of interesting features from the Bayesian model
     we suggest two decision rules in the paper: 1. the maximum of
     Median(R(q)) only for the subset of credibility intervals which do
     not include 1 2. the largest threshold q for which the ratio R(q)
     il bigger than 2

     The first one is pointing out the strongest deviation from
     independence, whilst the second is the largest threshold where the
     number of genes called in common at least doubles the number of
     genes in common under independence. The user can always define an
     additional threshold q which he finds of interest and obtain the
     list of genes associated with it

_V_a_l_u_e:

     The function returns an object of the class list. Each element is
     a matrix where the first column has the names of the genes while
     the second/third columns have the Pvalues from the two
     experiments: 

     max: The list of genes in common selected on the basis of the
          threshold associated to Rmax

   rule2: The list of genes in common selected on the basis of the
          threshold associated to R2

 table.q: The list of genes in common selected on the basis of the
          additional thresholds selected by the user

_A_u_t_h_o_r(_s):

     Marta Blangiardo

_R_e_f_e_r_e_n_c_e_s:

     1. M.Blangiardo and S.Richardson Statistical tools for
     synthesizing lists of differentially expressed features in related
     experiments, Genome Biology, 8, R54

_E_x_a_m_p_l_e_s:

     data = simulation(n=500,GammaA=1,GammaB=1,r1=0.5,r2=0.8,DEfirst=300,DEsecond=200,DEcommon=100)
     T<- ratio(data$Pval,interval=0.01,dir="D:/",name="CompData1Data2",pvalue=TRUE)
     BayesianModel<- baymod(data$Pval,repl=100,output.ratio=T,dir="D:/")
     gene.names = data$names
     gene.lists <- extractGenes(output.ratio=T,output.bay=BayesianModel,gene.names,data$Pval,q=NULL)

     ## The function is currently defined as
     function(output.ratio,output.bay,gene.names,data,q=NULL){
     if(output.ratio$pvalue==FALSE){
     data = 1 - data
     }

     lists = dim(data)[2]
     #Decision rules:
     #1) Maximum for CI not including 1
     max.R = max(output.bay[round(round(output.bay[,1],2),1)>1,2])
     threshold.max = output.ratio$q[output.bay[,2]==max.R]

     #Table
     temp = matrix(0,dim(data)[1],lists)
     for(i in 1:dim(data)[1]){
                     for(j in 1:lists){
                             if(data[i,j]<= threshold.max){temp[i,j]<-1}
                     }
     }

     table.max <- data[apply(temp,1,sum)==lists,]
     names.max <- gene.names[apply(temp,1,sum)==lists]

     if(output.ratio$pvalue==FALSE){
     table.max <- data.frame(Names=names.max,RankingStat = 1- table.max)
     }

     if(output.ratio$pvalue==TRUE){
     table.max <- data.frame(Names=names.max,RankingStat = table.max)
     }

     if(length(output.ratio$q[output.bay[round(output.bay[,1],2)>1,2]>=2])>0){

     #2) Rule 2
     threshold.2 = max(output.ratio$q[round(round(output.bay[,2],2),3)>=2 & round(round(output.bay[,1],2),1)>1])

     #Table
     temp = matrix(0,dim(data)[1],lists)
     for(i in 1:dim(data)[1]){
                     for(j in 1:lists){
                             if(data[i,j]<= threshold.2){temp[i,j]<-1}
                     }
     }

     table.2 <- data[apply(temp,1,sum)==lists,]
     names.2 <- gene.names[apply(temp,1,sum)==lists]

     if(output.ratio$pvalue==FALSE){
     table.2 <- data.frame(Names=names.2,RankingStat = 1-table.2)
     }

     if(output.ratio$pvalue==TRUE){
     table.2 <- data.frame(Names=names.2,RankingStat = table.2)
             }

     if(is.null(q))
     {return(list(max = table.max,rule2 = table.2))}

     if(!is.null(q)){
     l = length(q)
     table.q = list()
             for(r in 1:l){

     temp = matrix(0,dim(data)[1],lists)
     for(i in 1:dim(data)[1]){
                     for(j in 1:lists){
                             if(data[i,j]<= q[r]){temp[i,j]<-1}
                                     }
                             }

     table.q[[r]] <- data[apply(temp,1,sum)==lists,]
     names.q <- gene.names[apply(temp,1,sum)==lists]
     if(output.ratio$pvalue==FALSE){
             table.q[[r]] <- data.frame(Names=names.q,RankingStat = 1-table.q[[r]])
             }

     if(output.ratio$pvalue==TRUE){
             table.q[[r]] <- data.frame(Names=names.q,RankingStat = table.q[[r]])
             }

     names(table.q)[[r]] <- paste("q=",q[r]) 
                             }
                     }
     return(list(max = table.max,rule2 = table.2, User = table.q))
             }

     if(length(output.ratio$q[output.bay[round(output.bay[,1],2)>1,2]>=2])==0){

     if(is.null(q))
     {return(list(max = table.max))}

     if(!is.null(q)){
     l = length(q)
     table.q = list()
             for(r in 1:l){

     temp = matrix(0,dim(data)[1],lists)
     for(i in 1:dim(data)[1]){
                     for(j in 1:lists){
                             if(data[i,j]<= q[r]){temp[i,j]<-1}
                                     }
                             }

     table.q[[r]] <- data[apply(temp,1,sum)==lists,]
     names.q <- gene.names[apply(temp,1,sum)==lists]
     if(output.ratio$pvalue==FALSE){
             table.q[[r]] <- data.frame(Names=names.q,RankingStat = 1-table.q[[r]])
             }

     if(output.ratio$pvalue==TRUE){
             table.q[[r]] <- data.frame(Names=names.q,RankingStat = table.q[[r]])
             }

     names(table.q)[[r]] <- paste("q=",q[r]) 
                                     }
                             }
     return(list(max = table.max,User = table.q))
             }

     }

