Ejemplo n.º 1
0
int AnovaTest::anovaresi(gsl_matrix *bY, const unsigned int i)
{
    unsigned int hid=i, aid = i-1;

    // count the right-hand tails
    calcSS(bY, &(Hats[aid]), mmRef);
    calcSS(bY, &(Hats[hid]), mmRef);
    testStatCalc(&(Hats[hid]), &(Hats[aid]), mmRef, TRUE, &(bMultStat), bStatj);

    // count data related to P-values
    if (bMultStat >= multstat[aid]) Pmultstat[aid]++;
    // get result ptr corresponds to model i
    double *sj = gsl_matrix_ptr (statj, aid, 0);
    double *pj = gsl_matrix_ptr (Pstatj, aid, 0);
    double *bj = gsl_vector_ptr (bStatj, 0);
    calcAdjustP(mmRef->punit, nVars, bj, sj, pj, sortid[aid]);    
       
   return 0;
}
Ejemplo n.º 2
0
int AnovaTest::anovacase(gsl_matrix *bY, gsl_matrix *bX)
{
   unsigned int j;
   // if Y col is all zeros
   for ( j=0; j<nVars; j++ ){
       gsl_vector_view colj = gsl_matrix_column(bY, j);
       if ( gsl_vector_isnull(&colj.vector) == TRUE ) return GSL_ERANGE;
   }

   unsigned int i, hid, aid;
   double *sj, *pj, *bj;
   gsl_matrix *Z = gsl_matrix_alloc(nRows, nVars);
   gsl_matrix_memcpy(Z, bY);
   // Hats.X 
   for (i=0; i<nModels-1; i++){
      hid = i+1; aid = i;  
      gsl_vector_view ref1 = gsl_matrix_row(inRef, aid);
      subX(bX, &ref1.vector, Hats[aid].X);
      gsl_vector_view ref0 = gsl_matrix_row(inRef, hid);
      subX(bX, &ref0.vector, Hats[hid].X);
      //Y = X*coef
      gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,-1.0,Hats[aid].X,Hats[aid].Coef,0.0,Z); 
      //Z = bY - Yhat;
      gsl_matrix_add (Z, bY);
      // calc teststats
      calcSS(Z, &(Hats[hid]), mmRef);
      calcSS(Z, &(Hats[aid]), mmRef);
      testStatCalc(&(Hats[hid]), &(Hats[aid]), mmRef, TRUE, &(bMultStat), bStatj);

      if (bMultStat >= multstat[i]) Pmultstat[i]++;
      sj = gsl_matrix_ptr (statj, i, 0);
      pj = gsl_matrix_ptr (Pstatj, i, 0);
      bj = gsl_vector_ptr (bStatj, 0);          
      calcAdjustP(mmRef->punit, nVars, bj, sj, pj, sortid[i]);
   }

  gsl_matrix_free(Z);

  return 0;
}
Ejemplo n.º 3
0
int GlmTest::summary(glm *fit)
{
    double lambda;
    unsigned int k;
    unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam;
    unsigned int mtype = fit->mmRef->model-1;
    PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef);
    BinGlm binNull(fit->mmRef), binAlt(fit->mmRef);
    NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef);
    glm *PtrNull[3] = { &pNull, &nbNull, &binNull };
    glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt };
    gsl_vector_view teststat, unitstat;
    gsl_matrix_view L1;
    // To estimate initial Beta from PtrNull->Beta    
//    gsl_vector *ref=gsl_vector_alloc(nParam);
//    gsl_matrix *BetaO=gsl_matrix_alloc(nParam, nVars);

    smryStat = gsl_matrix_alloc((nParam+1), nVars+1);
    Psmry = gsl_matrix_alloc((nParam+1), nVars+1);
    gsl_matrix_set_zero (Psmry);

    // initialize the design matrix for all hypo tests
    GrpMat *GrpXs = (GrpMat *)malloc((nParam+2)*sizeof(GrpMat));
    GrpXs[0].matrix = gsl_matrix_alloc(nRows, nParam);
    gsl_matrix_memcpy(GrpXs[0].matrix, fit->Xref); // the alt X
    GrpXs[1].matrix = gsl_matrix_alloc(nRows, 1); // overall test
    gsl_matrix_set_all (GrpXs[1].matrix, 1.0);
    for (k=2; k<nParam+2; k++) { // significance tests
       GrpXs[k].matrix = gsl_matrix_alloc(nRows, nParam-1);
       subX2(fit->Xref, k-2, GrpXs[k].matrix);
    }
    // Calc test statistics
    if ( tm->test == WALD ) {
        // the overall test compares to mean 
        teststat = gsl_matrix_row(smryStat, 0);
        L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam);
        lambda=gsl_vector_get(tm->smry_lambda, 0);
        GetR(fit->Res, tm->corr, lambda, Rlambda);
        GeeWald(fit, &L1.matrix, &teststat.vector);
        // the significance test 
        for (k=2; k<nParam+2; k++) {
            teststat = gsl_matrix_row(smryStat, k-1);
            L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam);            
            GeeWald(fit, &L1.matrix, &teststat.vector);
        }
    }
    else if (tm->test==SCORE) {
        for (k=1; k<nParam+2; k++) {
            teststat=gsl_matrix_row(smryStat, k-1);
            PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); 
            lambda=gsl_vector_get(tm->smry_lambda, k);
            GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda);
            GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector);
        }
    }
    else {
        for (k=1; k<nParam+2; k++) {
            teststat=gsl_matrix_row(smryStat, k-1);
            PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); 
            GeeLR(fit, PtrNull[mtype], &teststat.vector); // works better
        }
    }    

    // sort id if the unitvaraite test is free step-down
    gsl_permutation **sortid;
    sortid=(gsl_permutation **)malloc((nParam+1)*sizeof(gsl_permutation *));
    for ( k=0; k<(nParam+1); k++ ) {
        teststat = gsl_matrix_row (smryStat, k);
        unitstat = gsl_vector_subvector(&teststat.vector, 1, nVars);
        sortid[k] = gsl_permutation_alloc(nVars);
        gsl_sort_vector_index (sortid[k], &unitstat.vector);
        gsl_permutation_reverse(sortid[k]);  // rearrange in descending order
    }

    if (tm->resamp==MONTECARLO) {
       lambda=gsl_vector_get(tm->smry_lambda,0);
       GetR(fit->Res, tm->corr, lambda, Sigma);
       setMonteCarlo(fit, XBeta, Sigma);
    }

    nSamp=0;
    double *suj, *buj, *puj;
    gsl_matrix *bStat = gsl_matrix_alloc((nParam+1), nVars+1);
    gsl_matrix_set_zero (bStat);
    gsl_matrix *bY = gsl_matrix_alloc(nRows, nVars);
    gsl_matrix *bO = gsl_matrix_alloc(nRows, nVars);
    gsl_matrix_memcpy (bO, fit->Eta);
    double diff, timelast=0;
    clock_t clk_start=clock();

    for ( unsigned int i=0; i<tm->nboot; i++) {        
        if ( tm->resamp==CASEBOOT ) 
             resampSmryCase(fit,bY,GrpXs,bO,i);
        else resampNonCase(fit, bY, i);

        if ( tm->test == WALD ) {
            PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL);
            // the overall test compares to mean 
            teststat = gsl_matrix_row(bStat, 0);
            L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam);
            lambda=gsl_vector_get(tm->smry_lambda, 0);
            GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda);
            GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector);
            // the significance test 
            for (k=2; k<nParam+2; k++) {
               teststat = gsl_matrix_row(bStat, k-1);
               L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam);
               GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector);
            }
        }
        else if (tm->test==SCORE) {
            for (k=1; k<nParam+2; k++) {
               teststat=gsl_matrix_row(bStat, k-1);
               PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); 
               lambda=gsl_vector_get(tm->smry_lambda,k);
               GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda);
               GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector);
            }
        }
        else {  // use single bAlt estimate works better
            PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL);
            for (k=1; k<nParam+2; k++) {
               teststat=gsl_matrix_row(bStat, k-1);
               PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); 
               GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector);
            }
        }
        for (k=0; k<(nParam+1); k++) {
           buj = gsl_matrix_ptr (bStat, k, 0);
           suj = gsl_matrix_ptr (smryStat, k, 0);
           puj = gsl_matrix_ptr (Psmry, k, 0);
           if ( *buj >= *suj ) *puj=*puj+1;
           calcAdjustP(tm->punit, nVars, buj+1, suj+1, puj+1, sortid[k]);
        } // end for j loop
        nSamp++;
        // Prompts
        if ((tm->showtime==TRUE)&(i%100==0)) {
           diff=(float)(clock()-clk_start)/(float)CLOCKS_PER_SEC;
           timelast+=(double)diff/60;
           printf("\tResampling run %d finished. Time elapsed: %.2f min ...\n", i, timelast);
           clk_start=clock();
        }
    } // end for i loop

    // ========= Get P-values ========= //        
    if ( tm->punit == FREESTEP ) {
       for (k=0; k<(nParam+1); k++) {
           puj = gsl_matrix_ptr (Psmry, k, 1);
           reinforceP( puj, nVars, sortid[k] );
    }  }
    // p = (#exceeding observed stat + 1)/(#nboot+1)
    gsl_matrix_add_constant (Psmry, 1.0);
    gsl_matrix_scale (Psmry, (double)1.0/(nSamp+1));

    for (k=0; k<nVars; k++) aic[k]=-fit->ll[k]+2*(nParam+1);

    // === release memory ==== //
    PtrAlt[mtype]->releaseGlm();
    if ( tm->test!=WALD ) 
        PtrNull[mtype]->releaseGlm();
    gsl_matrix_free(bStat);
    gsl_matrix_free(bY);
    gsl_matrix_free(bO);

    for (k=0; k<nParam+1; k++) 
       if (sortid[k]!=NULL) gsl_permutation_free(sortid[k]);
    free(sortid);

    if ( GrpXs != NULL ) {
       for ( unsigned int k=0; k<nParam+2; k++ ) 
           if ( GrpXs[k].matrix != NULL )
              gsl_matrix_free (GrpXs[k].matrix);
       free(GrpXs);
    }

    return SUCCESS;
}
Ejemplo n.º 4
0
int GlmTest::anova(glm *fit, gsl_matrix *isXvarIn) 
{
    // Assume the models have been already sorted (in R)
    Xin = isXvarIn;
    nModels = Xin->size1;
    double *rdf = new double [nModels];
    unsigned int nP, i, j, k;
    unsigned int ID0, ID1, nP0, nP1;
    unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam;
    unsigned int mtype = fit->mmRef->model-1;

    dfDiff = new unsigned int [nModels-1];
    anovaStat = gsl_matrix_alloc((nModels-1), nVars+1);
    Panova = gsl_matrix_alloc((nModels-1), nVars+1);
    gsl_vector *bStat = gsl_vector_alloc(nVars+1);
    gsl_matrix_set_zero (anovaStat);    
    gsl_matrix_set_zero (Panova);
    gsl_vector_set_zero (bStat);

    PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef);
    BinGlm binNull(fit->mmRef), binAlt(fit->mmRef);
    NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef);
    PoissonGlm pNullb(fit->mmRef), pAltb(fit->mmRef);
    BinGlm binNullb(fit->mmRef), binAltb(fit->mmRef);
    NBinGlm nbNullb(fit->mmRef), nbAltb(fit->mmRef);
    glm *PtrNull[3] = { &pNull, &nbNull, &binNull };
    glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt };
    glm *bNull[3] = { &pNullb, &nbNullb, &binNullb };
    glm *bAlt[3] = { &pAltb, &nbAltb, &binAltb };

    double *suj, *buj, *puj;
    gsl_vector_view teststat, unitstat,ref1, ref0; 
    gsl_matrix *X0=NULL, *X1=NULL, *L1=NULL, *tmp1=NULL, *BetaO=NULL;
    gsl_matrix *bO=NULL, *bY=gsl_matrix_alloc(nRows, nVars);
    bO = gsl_matrix_alloc(nRows, nVars);

    gsl_permutation *sortid=NULL;
    if (tm->punit==FREESTEP) sortid = gsl_permutation_alloc(nVars);

    // ======= Fit the (first) Alt model =========//
    for (i=0; i<nModels; i++) {
        nP = 0;
        for (k=0; k<nParam; k++) 
	     if (gsl_matrix_get(Xin,i,k)!=FALSE) nP++;   
        rdf[i] = nRows-nP;
    }

    for (i=1; i<nModels; i++) {       
        // ======= Fit the Null model =========//
        ID0 = i; ID1 = i-1;
        nP0 = nRows - (unsigned int)rdf[ID0];
        nP1 = nRows - (unsigned int)rdf[ID1];

        // Degrees of freedom
        dfDiff[i-1] = nP1 - nP0;

        ref1=gsl_matrix_row(Xin, ID1);
        ref0=gsl_matrix_row(Xin, ID0);
        X0 = gsl_matrix_alloc(nRows, nP0);
        subX(fit->Xref, &ref0.vector, X0);
        X1 = gsl_matrix_alloc(nRows, nP1);
        subX(fit->Xref, &ref1.vector, X1);

	// ======= Get multivariate test statistics =======//
        // Estimate shrinkage parametr only once under H1 
        // See "FW: Doubts R package "mvabund" (12/14/11)
        teststat = gsl_matrix_row(anovaStat, (i-1));
        PtrNull[mtype]->regression(fit->Yref, X0, fit->Oref, NULL); 
        if (tm->test == SCORE) {
           lambda = gsl_vector_get(tm->anova_lambda, ID0);
           GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda);
           GeeScore(X1, PtrNull[mtype], &teststat.vector);
        }
        else if (tm->test==WALD) {
           PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, NULL);
           L1 = gsl_matrix_alloc (nP1-nP0, nP1);
           tmp1 = gsl_matrix_alloc (nParam, nP1);
           subX(L, &ref1.vector, tmp1);
           subXrow1(tmp1, &ref0.vector, &ref1.vector, L1);
           lambda = gsl_vector_get(tm->anova_lambda, ID1);
           GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda);
           GeeWald(PtrAlt[mtype], L1, &teststat.vector);
        }
        else {              
           BetaO = gsl_matrix_alloc(nP1, nVars);
           addXrow2(PtrNull[mtype]->Beta, &ref1.vector, BetaO); 
           PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, BetaO);
           GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector); 
        }

        if (tm->resamp==MONTECARLO) {
            lambda=gsl_vector_get(tm->anova_lambda,ID0);
            GetR(fit->Res, tm->corr, lambda, Sigma);
            setMonteCarlo (PtrNull[mtype], XBeta, Sigma);
        }

	// ======= Get univariate test statistics =======//
        if (tm->punit == FREESTEP) {  
            unitstat=gsl_vector_subvector(&teststat.vector,1,nVars);
            gsl_sort_vector_index (sortid, &unitstat.vector);
            gsl_permutation_reverse(sortid);        
        }

        // ======= Get resampling distribution under H0 ===== //
	nSamp=0;
        double dif, timelast=0;
        clock_t clk_start=clock();
        if (tm->showtime==TRUE)
           printf("Resampling begins for test %d.\n", i);
        for (j=0; j<tm->nboot; j++) {	
//            printf("simu %d :", j);
	    gsl_vector_set_zero (bStat);
	    if ( tm->resamp == CASEBOOT ) {
                resampAnovaCase(PtrAlt[mtype],bY,X1,bO,j);
                subX(X1, &ref0.vector, X0);
            } 
            else {
                resampNonCase(PtrNull[mtype], bY, j);
                gsl_matrix_memcpy(bO, fit->Oref);
            }

            if ( tm->test == WALD ) {
                bAlt[mtype]->regression(bY,X1,bO,NULL); 
                lambda = gsl_vector_get(tm->anova_lambda, ID1);
                GetR(bAlt[mtype]->Res, tm->corr, lambda, Rlambda);
                GeeWald(bAlt[mtype], L1, bStat);
            }
            else if ( tm->test == SCORE ) {
                bNull[mtype]->regression(bY,X0,bO,NULL); 
                lambda = gsl_vector_get(tm->anova_lambda, ID0);
                GetR(bNull[mtype]->Res, tm->corr, lambda, Rlambda);
                GeeScore(X1, bNull[mtype], bStat);
            }
            else {
                bNull[mtype]->regression(bY,X0,bO,NULL); 
                addXrow2(bNull[mtype]->Beta, &ref1.vector, BetaO); 
                bAlt[mtype]->regression(bY,X1,bO,BetaO); 
                GeeLR(bAlt[mtype], bNull[mtype], bStat);                    
            }
            // ----- get multivariate counts ------- //   
           buj = gsl_vector_ptr (bStat,0);
           suj = gsl_matrix_ptr (anovaStat, i-1, 0);
           puj = gsl_matrix_ptr (Panova, i-1, 0);
           if ( *(buj) > (*(suj)-1e-8) ) *puj=*puj+1;
           // ------ get univariate counts ---------//            
           calcAdjustP(tm->punit,nVars,buj+1,suj+1,puj+1,sortid);
	   nSamp++;
           // Prompts
           if ((tm->showtime==TRUE)&(j%100==0)) {
              dif = (float)(clock() - clk_start)/(float)CLOCKS_PER_SEC;
              timelast+=(double)dif/60;
              printf("\tResampling run %d finished. Time elapsed: %.2f minutes...\n", j, timelast);
              clk_start=clock();
           }
        } // end j for loop

       // ========= get p-values ======== //
       if ( tm->punit == FREESTEP) {
          puj = gsl_matrix_ptr (Panova, i-1, 1);
          reinforceP(puj, nVars, sortid);
       }

       if (BetaO!=NULL) gsl_matrix_free(BetaO);
       if (X0!=NULL) gsl_matrix_free(X0);   
       if (X1!=NULL) gsl_matrix_free(X1);   
       if (tm->test == WALD) { 
          if (L1!=NULL) gsl_matrix_free(L1);
          if (tmp1!=NULL) gsl_matrix_free(tmp1);
       }
    } // end i for loop  and test for loop

    // p = (#exceeding observed stat + 1)/(#nboot+1)
    gsl_matrix_add_constant (Panova, 1.0);
    gsl_matrix_scale (Panova, (double)1/(nSamp+1.0));

    bAlt[mtype]->releaseGlm();
    PtrAlt[mtype]->releaseGlm();
    if ( tm->test!=WALD ) {
        bNull[mtype]->releaseGlm();
        PtrNull[mtype]->releaseGlm();
    }
    delete []rdf;
    if (sortid != NULL )
        gsl_permutation_free(sortid);
    gsl_vector_free(bStat);
    gsl_matrix_free(bY);   
    if (bO!=NULL) gsl_matrix_free(bO);   
    
    return SUCCESS;
}