Example #1
0
static VALUE rb_gsl_permutation_reverse(VALUE obj)
{
  gsl_permutation *p = NULL;
  Data_Get_Struct(obj, gsl_permutation, p);
  gsl_permutation_reverse(p);
  return obj;
}
Example #2
0
/* Return indices of sorted pixels from greatest to smallest. */
static gsl_permutation *get_pixel_ranks(long npix, double *P)
{
    gsl_permutation *pix_perm = gsl_permutation_alloc(npix);
    if (pix_perm)
    {
        gsl_vector_view P_vector = gsl_vector_view_array(P, npix);
        gsl_sort_vector_index(pix_perm, &P_vector.vector);
        gsl_permutation_reverse(pix_perm);
    }
    return pix_perm;
}
Example #3
0
int GlmTest::summary(glm *fit)
{
    double lambda;
    unsigned int k;
    unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam;
    unsigned int mtype = fit->mmRef->model-1;
    PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef);
    BinGlm binNull(fit->mmRef), binAlt(fit->mmRef);
    NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef);
    glm *PtrNull[3] = { &pNull, &nbNull, &binNull };
    glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt };
    gsl_vector_view teststat, unitstat;
    gsl_matrix_view L1;
    // To estimate initial Beta from PtrNull->Beta    
//    gsl_vector *ref=gsl_vector_alloc(nParam);
//    gsl_matrix *BetaO=gsl_matrix_alloc(nParam, nVars);

    smryStat = gsl_matrix_alloc((nParam+1), nVars+1);
    Psmry = gsl_matrix_alloc((nParam+1), nVars+1);
    gsl_matrix_set_zero (Psmry);

    // initialize the design matrix for all hypo tests
    GrpMat *GrpXs = (GrpMat *)malloc((nParam+2)*sizeof(GrpMat));
    GrpXs[0].matrix = gsl_matrix_alloc(nRows, nParam);
    gsl_matrix_memcpy(GrpXs[0].matrix, fit->Xref); // the alt X
    GrpXs[1].matrix = gsl_matrix_alloc(nRows, 1); // overall test
    gsl_matrix_set_all (GrpXs[1].matrix, 1.0);
    for (k=2; k<nParam+2; k++) { // significance tests
       GrpXs[k].matrix = gsl_matrix_alloc(nRows, nParam-1);
       subX2(fit->Xref, k-2, GrpXs[k].matrix);
    }
    // Calc test statistics
    if ( tm->test == WALD ) {
        // the overall test compares to mean 
        teststat = gsl_matrix_row(smryStat, 0);
        L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam);
        lambda=gsl_vector_get(tm->smry_lambda, 0);
        GetR(fit->Res, tm->corr, lambda, Rlambda);
        GeeWald(fit, &L1.matrix, &teststat.vector);
        // the significance test 
        for (k=2; k<nParam+2; k++) {
            teststat = gsl_matrix_row(smryStat, k-1);
            L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam);            
            GeeWald(fit, &L1.matrix, &teststat.vector);
        }
    }
    else if (tm->test==SCORE) {
        for (k=1; k<nParam+2; k++) {
            teststat=gsl_matrix_row(smryStat, k-1);
            PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); 
            lambda=gsl_vector_get(tm->smry_lambda, k);
            GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda);
            GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector);
        }
    }
    else {
        for (k=1; k<nParam+2; k++) {
            teststat=gsl_matrix_row(smryStat, k-1);
            PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); 
            GeeLR(fit, PtrNull[mtype], &teststat.vector); // works better
        }
    }    

    // sort id if the unitvaraite test is free step-down
    gsl_permutation **sortid;
    sortid=(gsl_permutation **)malloc((nParam+1)*sizeof(gsl_permutation *));
    for ( k=0; k<(nParam+1); k++ ) {
        teststat = gsl_matrix_row (smryStat, k);
        unitstat = gsl_vector_subvector(&teststat.vector, 1, nVars);
        sortid[k] = gsl_permutation_alloc(nVars);
        gsl_sort_vector_index (sortid[k], &unitstat.vector);
        gsl_permutation_reverse(sortid[k]);  // rearrange in descending order
    }

    if (tm->resamp==MONTECARLO) {
       lambda=gsl_vector_get(tm->smry_lambda,0);
       GetR(fit->Res, tm->corr, lambda, Sigma);
       setMonteCarlo(fit, XBeta, Sigma);
    }

    nSamp=0;
    double *suj, *buj, *puj;
    gsl_matrix *bStat = gsl_matrix_alloc((nParam+1), nVars+1);
    gsl_matrix_set_zero (bStat);
    gsl_matrix *bY = gsl_matrix_alloc(nRows, nVars);
    gsl_matrix *bO = gsl_matrix_alloc(nRows, nVars);
    gsl_matrix_memcpy (bO, fit->Eta);
    double diff, timelast=0;
    clock_t clk_start=clock();

    for ( unsigned int i=0; i<tm->nboot; i++) {        
        if ( tm->resamp==CASEBOOT ) 
             resampSmryCase(fit,bY,GrpXs,bO,i);
        else resampNonCase(fit, bY, i);

        if ( tm->test == WALD ) {
            PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL);
            // the overall test compares to mean 
            teststat = gsl_matrix_row(bStat, 0);
            L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam);
            lambda=gsl_vector_get(tm->smry_lambda, 0);
            GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda);
            GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector);
            // the significance test 
            for (k=2; k<nParam+2; k++) {
               teststat = gsl_matrix_row(bStat, k-1);
               L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam);
               GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector);
            }
        }
        else if (tm->test==SCORE) {
            for (k=1; k<nParam+2; k++) {
               teststat=gsl_matrix_row(bStat, k-1);
               PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); 
               lambda=gsl_vector_get(tm->smry_lambda,k);
               GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda);
               GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector);
            }
        }
        else {  // use single bAlt estimate works better
            PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL);
            for (k=1; k<nParam+2; k++) {
               teststat=gsl_matrix_row(bStat, k-1);
               PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); 
               GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector);
            }
        }
        for (k=0; k<(nParam+1); k++) {
           buj = gsl_matrix_ptr (bStat, k, 0);
           suj = gsl_matrix_ptr (smryStat, k, 0);
           puj = gsl_matrix_ptr (Psmry, k, 0);
           if ( *buj >= *suj ) *puj=*puj+1;
           calcAdjustP(tm->punit, nVars, buj+1, suj+1, puj+1, sortid[k]);
        } // end for j loop
        nSamp++;
        // Prompts
        if ((tm->showtime==TRUE)&(i%100==0)) {
           diff=(float)(clock()-clk_start)/(float)CLOCKS_PER_SEC;
           timelast+=(double)diff/60;
           printf("\tResampling run %d finished. Time elapsed: %.2f min ...\n", i, timelast);
           clk_start=clock();
        }
    } // end for i loop

    // ========= Get P-values ========= //        
    if ( tm->punit == FREESTEP ) {
       for (k=0; k<(nParam+1); k++) {
           puj = gsl_matrix_ptr (Psmry, k, 1);
           reinforceP( puj, nVars, sortid[k] );
    }  }
    // p = (#exceeding observed stat + 1)/(#nboot+1)
    gsl_matrix_add_constant (Psmry, 1.0);
    gsl_matrix_scale (Psmry, (double)1.0/(nSamp+1));

    for (k=0; k<nVars; k++) aic[k]=-fit->ll[k]+2*(nParam+1);

    // === release memory ==== //
    PtrAlt[mtype]->releaseGlm();
    if ( tm->test!=WALD ) 
        PtrNull[mtype]->releaseGlm();
    gsl_matrix_free(bStat);
    gsl_matrix_free(bY);
    gsl_matrix_free(bO);

    for (k=0; k<nParam+1; k++) 
       if (sortid[k]!=NULL) gsl_permutation_free(sortid[k]);
    free(sortid);

    if ( GrpXs != NULL ) {
       for ( unsigned int k=0; k<nParam+2; k++ ) 
           if ( GrpXs[k].matrix != NULL )
              gsl_matrix_free (GrpXs[k].matrix);
       free(GrpXs);
    }

    return SUCCESS;
}
Example #4
0
int GlmTest::anova(glm *fit, gsl_matrix *isXvarIn) 
{
    // Assume the models have been already sorted (in R)
    Xin = isXvarIn;
    nModels = Xin->size1;
    double *rdf = new double [nModels];
    unsigned int nP, i, j, k;
    unsigned int ID0, ID1, nP0, nP1;
    unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam;
    unsigned int mtype = fit->mmRef->model-1;

    dfDiff = new unsigned int [nModels-1];
    anovaStat = gsl_matrix_alloc((nModels-1), nVars+1);
    Panova = gsl_matrix_alloc((nModels-1), nVars+1);
    gsl_vector *bStat = gsl_vector_alloc(nVars+1);
    gsl_matrix_set_zero (anovaStat);    
    gsl_matrix_set_zero (Panova);
    gsl_vector_set_zero (bStat);

    PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef);
    BinGlm binNull(fit->mmRef), binAlt(fit->mmRef);
    NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef);
    PoissonGlm pNullb(fit->mmRef), pAltb(fit->mmRef);
    BinGlm binNullb(fit->mmRef), binAltb(fit->mmRef);
    NBinGlm nbNullb(fit->mmRef), nbAltb(fit->mmRef);
    glm *PtrNull[3] = { &pNull, &nbNull, &binNull };
    glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt };
    glm *bNull[3] = { &pNullb, &nbNullb, &binNullb };
    glm *bAlt[3] = { &pAltb, &nbAltb, &binAltb };

    double *suj, *buj, *puj;
    gsl_vector_view teststat, unitstat,ref1, ref0; 
    gsl_matrix *X0=NULL, *X1=NULL, *L1=NULL, *tmp1=NULL, *BetaO=NULL;
    gsl_matrix *bO=NULL, *bY=gsl_matrix_alloc(nRows, nVars);
    bO = gsl_matrix_alloc(nRows, nVars);

    gsl_permutation *sortid=NULL;
    if (tm->punit==FREESTEP) sortid = gsl_permutation_alloc(nVars);

    // ======= Fit the (first) Alt model =========//
    for (i=0; i<nModels; i++) {
        nP = 0;
        for (k=0; k<nParam; k++) 
	     if (gsl_matrix_get(Xin,i,k)!=FALSE) nP++;   
        rdf[i] = nRows-nP;
    }

    for (i=1; i<nModels; i++) {       
        // ======= Fit the Null model =========//
        ID0 = i; ID1 = i-1;
        nP0 = nRows - (unsigned int)rdf[ID0];
        nP1 = nRows - (unsigned int)rdf[ID1];

        // Degrees of freedom
        dfDiff[i-1] = nP1 - nP0;

        ref1=gsl_matrix_row(Xin, ID1);
        ref0=gsl_matrix_row(Xin, ID0);
        X0 = gsl_matrix_alloc(nRows, nP0);
        subX(fit->Xref, &ref0.vector, X0);
        X1 = gsl_matrix_alloc(nRows, nP1);
        subX(fit->Xref, &ref1.vector, X1);

	// ======= Get multivariate test statistics =======//
        // Estimate shrinkage parametr only once under H1 
        // See "FW: Doubts R package "mvabund" (12/14/11)
        teststat = gsl_matrix_row(anovaStat, (i-1));
        PtrNull[mtype]->regression(fit->Yref, X0, fit->Oref, NULL); 
        if (tm->test == SCORE) {
           lambda = gsl_vector_get(tm->anova_lambda, ID0);
           GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda);
           GeeScore(X1, PtrNull[mtype], &teststat.vector);
        }
        else if (tm->test==WALD) {
           PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, NULL);
           L1 = gsl_matrix_alloc (nP1-nP0, nP1);
           tmp1 = gsl_matrix_alloc (nParam, nP1);
           subX(L, &ref1.vector, tmp1);
           subXrow1(tmp1, &ref0.vector, &ref1.vector, L1);
           lambda = gsl_vector_get(tm->anova_lambda, ID1);
           GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda);
           GeeWald(PtrAlt[mtype], L1, &teststat.vector);
        }
        else {              
           BetaO = gsl_matrix_alloc(nP1, nVars);
           addXrow2(PtrNull[mtype]->Beta, &ref1.vector, BetaO); 
           PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, BetaO);
           GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector); 
        }

        if (tm->resamp==MONTECARLO) {
            lambda=gsl_vector_get(tm->anova_lambda,ID0);
            GetR(fit->Res, tm->corr, lambda, Sigma);
            setMonteCarlo (PtrNull[mtype], XBeta, Sigma);
        }

	// ======= Get univariate test statistics =======//
        if (tm->punit == FREESTEP) {  
            unitstat=gsl_vector_subvector(&teststat.vector,1,nVars);
            gsl_sort_vector_index (sortid, &unitstat.vector);
            gsl_permutation_reverse(sortid);        
        }

        // ======= Get resampling distribution under H0 ===== //
	nSamp=0;
        double dif, timelast=0;
        clock_t clk_start=clock();
        if (tm->showtime==TRUE)
           printf("Resampling begins for test %d.\n", i);
        for (j=0; j<tm->nboot; j++) {	
//            printf("simu %d :", j);
	    gsl_vector_set_zero (bStat);
	    if ( tm->resamp == CASEBOOT ) {
                resampAnovaCase(PtrAlt[mtype],bY,X1,bO,j);
                subX(X1, &ref0.vector, X0);
            } 
            else {
                resampNonCase(PtrNull[mtype], bY, j);
                gsl_matrix_memcpy(bO, fit->Oref);
            }

            if ( tm->test == WALD ) {
                bAlt[mtype]->regression(bY,X1,bO,NULL); 
                lambda = gsl_vector_get(tm->anova_lambda, ID1);
                GetR(bAlt[mtype]->Res, tm->corr, lambda, Rlambda);
                GeeWald(bAlt[mtype], L1, bStat);
            }
            else if ( tm->test == SCORE ) {
                bNull[mtype]->regression(bY,X0,bO,NULL); 
                lambda = gsl_vector_get(tm->anova_lambda, ID0);
                GetR(bNull[mtype]->Res, tm->corr, lambda, Rlambda);
                GeeScore(X1, bNull[mtype], bStat);
            }
            else {
                bNull[mtype]->regression(bY,X0,bO,NULL); 
                addXrow2(bNull[mtype]->Beta, &ref1.vector, BetaO); 
                bAlt[mtype]->regression(bY,X1,bO,BetaO); 
                GeeLR(bAlt[mtype], bNull[mtype], bStat);                    
            }
            // ----- get multivariate counts ------- //   
           buj = gsl_vector_ptr (bStat,0);
           suj = gsl_matrix_ptr (anovaStat, i-1, 0);
           puj = gsl_matrix_ptr (Panova, i-1, 0);
           if ( *(buj) > (*(suj)-1e-8) ) *puj=*puj+1;
           // ------ get univariate counts ---------//            
           calcAdjustP(tm->punit,nVars,buj+1,suj+1,puj+1,sortid);
	   nSamp++;
           // Prompts
           if ((tm->showtime==TRUE)&(j%100==0)) {
              dif = (float)(clock() - clk_start)/(float)CLOCKS_PER_SEC;
              timelast+=(double)dif/60;
              printf("\tResampling run %d finished. Time elapsed: %.2f minutes...\n", j, timelast);
              clk_start=clock();
           }
        } // end j for loop

       // ========= get p-values ======== //
       if ( tm->punit == FREESTEP) {
          puj = gsl_matrix_ptr (Panova, i-1, 1);
          reinforceP(puj, nVars, sortid);
       }

       if (BetaO!=NULL) gsl_matrix_free(BetaO);
       if (X0!=NULL) gsl_matrix_free(X0);   
       if (X1!=NULL) gsl_matrix_free(X1);   
       if (tm->test == WALD) { 
          if (L1!=NULL) gsl_matrix_free(L1);
          if (tmp1!=NULL) gsl_matrix_free(tmp1);
       }
    } // end i for loop  and test for loop

    // p = (#exceeding observed stat + 1)/(#nboot+1)
    gsl_matrix_add_constant (Panova, 1.0);
    gsl_matrix_scale (Panova, (double)1/(nSamp+1.0));

    bAlt[mtype]->releaseGlm();
    PtrAlt[mtype]->releaseGlm();
    if ( tm->test!=WALD ) {
        bNull[mtype]->releaseGlm();
        PtrNull[mtype]->releaseGlm();
    }
    delete []rdf;
    if (sortid != NULL )
        gsl_permutation_free(sortid);
    gsl_vector_free(bStat);
    gsl_matrix_free(bY);   
    if (bO!=NULL) gsl_matrix_free(bO);   
    
    return SUCCESS;
}
Example #5
0
AnovaTest::AnovaTest(mv_Method *mm, gsl_matrix *Y, gsl_matrix *X, gsl_matrix *isXvarIn):mmRef(mm), Yref(Y), Xref(X), inRef(isXvarIn)
{
    unsigned int hid, aid;
    unsigned int i, j, count;
    nModels=inRef->size1, nParam=Xref->size2;
    nRows=Yref->size1, nVars=Yref->size2; 

//  printf("initialize public variables: stats\n");
    multstat=(double *)malloc((nModels-1)*sizeof(double));
    Pmultstat = (double *)malloc((nModels-1)*sizeof(double));
    for (j=0; j<nModels-1; j++) *(Pmultstat+j)=0.0; 
    dfDiff = (unsigned int *)malloc((nModels-1)*sizeof(unsigned int));

    statj = gsl_matrix_alloc(nModels-1, nVars);
    Pstatj = gsl_matrix_alloc(nModels-1, nVars);
    gsl_matrix_set_zero(Pstatj);

    bStatj = gsl_vector_alloc(nVars);
    Hats = (mv_mat *)malloc(nModels*sizeof(mv_mat)); 
    sortid = (gsl_permutation **)malloc((nModels-1)*sizeof(gsl_permutation *));
    
    for (i=0; i<nModels; i++ ) {
        // Hats[i]
        Hats[i].mat=gsl_matrix_alloc(nRows, nRows);
        Hats[i].SS=gsl_matrix_alloc(nVars, nVars);
        Hats[i].R=gsl_matrix_alloc(nVars, nVars);
        Hats[i].Res=gsl_matrix_alloc(nRows, nVars);
        Hats[i].Y = gsl_matrix_alloc(nRows, nVars);
        Hats[i].sd = gsl_vector_alloc(nVars);
	count = 0;
	for (j=0; j<nParam; j++){
	    count+=(unsigned int)gsl_matrix_get(inRef, i, j);
	}
//	printf("count=%d \n", count);
	Hats[i].X = gsl_matrix_alloc(nRows, count);
	Hats[i].Coef=gsl_matrix_alloc(count, nVars);
        gsl_vector_view refi=gsl_matrix_row(inRef, i);
	subX(Xref, &refi.vector, Hats[i].X);
        calcSS(Yref, &(Hats[i]), mmRef);
//	displaymatrix(Hats[i].SS, "SS");
    }

    for (i=1; i<nModels; i++) {
        hid = i; aid = i-1;
        if ( mmRef->resamp != CASEBOOT ) {
            // fit = Y- resi 
            gsl_matrix_memcpy (Hats[i].Y, Yref);
            gsl_matrix_sub (Hats[i].Y, Hats[i].Res);
        } 
        gsl_vector_view statij = gsl_matrix_row(statj, aid);
        testStatCalc(&(Hats[hid]), &(Hats[aid]), mmRef, TRUE, (multstat+aid), &statij.vector); 
	dfDiff[aid] = Hats[aid].X->size2-Hats[hid].X->size2;
        // sortid
        sortid[aid] = gsl_permutation_alloc(nVars);
        gsl_sort_vector_index (sortid[aid], &statij.vector); 
        // rearrange sortid in descending order
        gsl_permutation_reverse (sortid[aid]);
    }  

    // initialize resampling indices 
//    getBootID(); done in R
    bootID = NULL;


    // Initialize GSL rnd environment variables
    const gsl_rng_type *T;
    gsl_rng_env_setup();
    T = gsl_rng_default;
    // an mt19937 generator with a seed of 0
    rnd = gsl_rng_alloc(T);
    if (mmRef->reprand!=TRUE){
       struct timeval tv;  // seed generation based on time
       gettimeofday(&tv, 0);
       unsigned long mySeed=tv.tv_sec + tv.tv_usec;
       gsl_rng_set(rnd, mySeed);  // reset seed
    }

//    printf("Anova test initialized.\n");

}