static VALUE rb_gsl_permutation_reverse(VALUE obj) { gsl_permutation *p = NULL; Data_Get_Struct(obj, gsl_permutation, p); gsl_permutation_reverse(p); return obj; }
/* Return indices of sorted pixels from greatest to smallest. */ static gsl_permutation *get_pixel_ranks(long npix, double *P) { gsl_permutation *pix_perm = gsl_permutation_alloc(npix); if (pix_perm) { gsl_vector_view P_vector = gsl_vector_view_array(P, npix); gsl_sort_vector_index(pix_perm, &P_vector.vector); gsl_permutation_reverse(pix_perm); } return pix_perm; }
int GlmTest::summary(glm *fit) { double lambda; unsigned int k; unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam; unsigned int mtype = fit->mmRef->model-1; PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef); BinGlm binNull(fit->mmRef), binAlt(fit->mmRef); NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef); glm *PtrNull[3] = { &pNull, &nbNull, &binNull }; glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt }; gsl_vector_view teststat, unitstat; gsl_matrix_view L1; // To estimate initial Beta from PtrNull->Beta // gsl_vector *ref=gsl_vector_alloc(nParam); // gsl_matrix *BetaO=gsl_matrix_alloc(nParam, nVars); smryStat = gsl_matrix_alloc((nParam+1), nVars+1); Psmry = gsl_matrix_alloc((nParam+1), nVars+1); gsl_matrix_set_zero (Psmry); // initialize the design matrix for all hypo tests GrpMat *GrpXs = (GrpMat *)malloc((nParam+2)*sizeof(GrpMat)); GrpXs[0].matrix = gsl_matrix_alloc(nRows, nParam); gsl_matrix_memcpy(GrpXs[0].matrix, fit->Xref); // the alt X GrpXs[1].matrix = gsl_matrix_alloc(nRows, 1); // overall test gsl_matrix_set_all (GrpXs[1].matrix, 1.0); for (k=2; k<nParam+2; k++) { // significance tests GrpXs[k].matrix = gsl_matrix_alloc(nRows, nParam-1); subX2(fit->Xref, k-2, GrpXs[k].matrix); } // Calc test statistics if ( tm->test == WALD ) { // the overall test compares to mean teststat = gsl_matrix_row(smryStat, 0); L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam); lambda=gsl_vector_get(tm->smry_lambda, 0); GetR(fit->Res, tm->corr, lambda, Rlambda); GeeWald(fit, &L1.matrix, &teststat.vector); // the significance test for (k=2; k<nParam+2; k++) { teststat = gsl_matrix_row(smryStat, k-1); L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam); GeeWald(fit, &L1.matrix, &teststat.vector); } } else if (tm->test==SCORE) { for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(smryStat, k-1); PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); lambda=gsl_vector_get(tm->smry_lambda, k); GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector); } } else { for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(smryStat, k-1); PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); GeeLR(fit, PtrNull[mtype], &teststat.vector); // works better } } // sort id if the unitvaraite test is free step-down gsl_permutation **sortid; sortid=(gsl_permutation **)malloc((nParam+1)*sizeof(gsl_permutation *)); for ( k=0; k<(nParam+1); k++ ) { teststat = gsl_matrix_row (smryStat, k); unitstat = gsl_vector_subvector(&teststat.vector, 1, nVars); sortid[k] = gsl_permutation_alloc(nVars); gsl_sort_vector_index (sortid[k], &unitstat.vector); gsl_permutation_reverse(sortid[k]); // rearrange in descending order } if (tm->resamp==MONTECARLO) { lambda=gsl_vector_get(tm->smry_lambda,0); GetR(fit->Res, tm->corr, lambda, Sigma); setMonteCarlo(fit, XBeta, Sigma); } nSamp=0; double *suj, *buj, *puj; gsl_matrix *bStat = gsl_matrix_alloc((nParam+1), nVars+1); gsl_matrix_set_zero (bStat); gsl_matrix *bY = gsl_matrix_alloc(nRows, nVars); gsl_matrix *bO = gsl_matrix_alloc(nRows, nVars); gsl_matrix_memcpy (bO, fit->Eta); double diff, timelast=0; clock_t clk_start=clock(); for ( unsigned int i=0; i<tm->nboot; i++) { if ( tm->resamp==CASEBOOT ) resampSmryCase(fit,bY,GrpXs,bO,i); else resampNonCase(fit, bY, i); if ( tm->test == WALD ) { PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL); // the overall test compares to mean teststat = gsl_matrix_row(bStat, 0); L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam); lambda=gsl_vector_get(tm->smry_lambda, 0); GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda); GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector); // the significance test for (k=2; k<nParam+2; k++) { teststat = gsl_matrix_row(bStat, k-1); L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam); GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector); } } else if (tm->test==SCORE) { for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(bStat, k-1); PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); lambda=gsl_vector_get(tm->smry_lambda,k); GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector); } } else { // use single bAlt estimate works better PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL); for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(bStat, k-1); PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector); } } for (k=0; k<(nParam+1); k++) { buj = gsl_matrix_ptr (bStat, k, 0); suj = gsl_matrix_ptr (smryStat, k, 0); puj = gsl_matrix_ptr (Psmry, k, 0); if ( *buj >= *suj ) *puj=*puj+1; calcAdjustP(tm->punit, nVars, buj+1, suj+1, puj+1, sortid[k]); } // end for j loop nSamp++; // Prompts if ((tm->showtime==TRUE)&(i%100==0)) { diff=(float)(clock()-clk_start)/(float)CLOCKS_PER_SEC; timelast+=(double)diff/60; printf("\tResampling run %d finished. Time elapsed: %.2f min ...\n", i, timelast); clk_start=clock(); } } // end for i loop // ========= Get P-values ========= // if ( tm->punit == FREESTEP ) { for (k=0; k<(nParam+1); k++) { puj = gsl_matrix_ptr (Psmry, k, 1); reinforceP( puj, nVars, sortid[k] ); } } // p = (#exceeding observed stat + 1)/(#nboot+1) gsl_matrix_add_constant (Psmry, 1.0); gsl_matrix_scale (Psmry, (double)1.0/(nSamp+1)); for (k=0; k<nVars; k++) aic[k]=-fit->ll[k]+2*(nParam+1); // === release memory ==== // PtrAlt[mtype]->releaseGlm(); if ( tm->test!=WALD ) PtrNull[mtype]->releaseGlm(); gsl_matrix_free(bStat); gsl_matrix_free(bY); gsl_matrix_free(bO); for (k=0; k<nParam+1; k++) if (sortid[k]!=NULL) gsl_permutation_free(sortid[k]); free(sortid); if ( GrpXs != NULL ) { for ( unsigned int k=0; k<nParam+2; k++ ) if ( GrpXs[k].matrix != NULL ) gsl_matrix_free (GrpXs[k].matrix); free(GrpXs); } return SUCCESS; }
int GlmTest::anova(glm *fit, gsl_matrix *isXvarIn) { // Assume the models have been already sorted (in R) Xin = isXvarIn; nModels = Xin->size1; double *rdf = new double [nModels]; unsigned int nP, i, j, k; unsigned int ID0, ID1, nP0, nP1; unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam; unsigned int mtype = fit->mmRef->model-1; dfDiff = new unsigned int [nModels-1]; anovaStat = gsl_matrix_alloc((nModels-1), nVars+1); Panova = gsl_matrix_alloc((nModels-1), nVars+1); gsl_vector *bStat = gsl_vector_alloc(nVars+1); gsl_matrix_set_zero (anovaStat); gsl_matrix_set_zero (Panova); gsl_vector_set_zero (bStat); PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef); BinGlm binNull(fit->mmRef), binAlt(fit->mmRef); NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef); PoissonGlm pNullb(fit->mmRef), pAltb(fit->mmRef); BinGlm binNullb(fit->mmRef), binAltb(fit->mmRef); NBinGlm nbNullb(fit->mmRef), nbAltb(fit->mmRef); glm *PtrNull[3] = { &pNull, &nbNull, &binNull }; glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt }; glm *bNull[3] = { &pNullb, &nbNullb, &binNullb }; glm *bAlt[3] = { &pAltb, &nbAltb, &binAltb }; double *suj, *buj, *puj; gsl_vector_view teststat, unitstat,ref1, ref0; gsl_matrix *X0=NULL, *X1=NULL, *L1=NULL, *tmp1=NULL, *BetaO=NULL; gsl_matrix *bO=NULL, *bY=gsl_matrix_alloc(nRows, nVars); bO = gsl_matrix_alloc(nRows, nVars); gsl_permutation *sortid=NULL; if (tm->punit==FREESTEP) sortid = gsl_permutation_alloc(nVars); // ======= Fit the (first) Alt model =========// for (i=0; i<nModels; i++) { nP = 0; for (k=0; k<nParam; k++) if (gsl_matrix_get(Xin,i,k)!=FALSE) nP++; rdf[i] = nRows-nP; } for (i=1; i<nModels; i++) { // ======= Fit the Null model =========// ID0 = i; ID1 = i-1; nP0 = nRows - (unsigned int)rdf[ID0]; nP1 = nRows - (unsigned int)rdf[ID1]; // Degrees of freedom dfDiff[i-1] = nP1 - nP0; ref1=gsl_matrix_row(Xin, ID1); ref0=gsl_matrix_row(Xin, ID0); X0 = gsl_matrix_alloc(nRows, nP0); subX(fit->Xref, &ref0.vector, X0); X1 = gsl_matrix_alloc(nRows, nP1); subX(fit->Xref, &ref1.vector, X1); // ======= Get multivariate test statistics =======// // Estimate shrinkage parametr only once under H1 // See "FW: Doubts R package "mvabund" (12/14/11) teststat = gsl_matrix_row(anovaStat, (i-1)); PtrNull[mtype]->regression(fit->Yref, X0, fit->Oref, NULL); if (tm->test == SCORE) { lambda = gsl_vector_get(tm->anova_lambda, ID0); GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(X1, PtrNull[mtype], &teststat.vector); } else if (tm->test==WALD) { PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, NULL); L1 = gsl_matrix_alloc (nP1-nP0, nP1); tmp1 = gsl_matrix_alloc (nParam, nP1); subX(L, &ref1.vector, tmp1); subXrow1(tmp1, &ref0.vector, &ref1.vector, L1); lambda = gsl_vector_get(tm->anova_lambda, ID1); GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda); GeeWald(PtrAlt[mtype], L1, &teststat.vector); } else { BetaO = gsl_matrix_alloc(nP1, nVars); addXrow2(PtrNull[mtype]->Beta, &ref1.vector, BetaO); PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, BetaO); GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector); } if (tm->resamp==MONTECARLO) { lambda=gsl_vector_get(tm->anova_lambda,ID0); GetR(fit->Res, tm->corr, lambda, Sigma); setMonteCarlo (PtrNull[mtype], XBeta, Sigma); } // ======= Get univariate test statistics =======// if (tm->punit == FREESTEP) { unitstat=gsl_vector_subvector(&teststat.vector,1,nVars); gsl_sort_vector_index (sortid, &unitstat.vector); gsl_permutation_reverse(sortid); } // ======= Get resampling distribution under H0 ===== // nSamp=0; double dif, timelast=0; clock_t clk_start=clock(); if (tm->showtime==TRUE) printf("Resampling begins for test %d.\n", i); for (j=0; j<tm->nboot; j++) { // printf("simu %d :", j); gsl_vector_set_zero (bStat); if ( tm->resamp == CASEBOOT ) { resampAnovaCase(PtrAlt[mtype],bY,X1,bO,j); subX(X1, &ref0.vector, X0); } else { resampNonCase(PtrNull[mtype], bY, j); gsl_matrix_memcpy(bO, fit->Oref); } if ( tm->test == WALD ) { bAlt[mtype]->regression(bY,X1,bO,NULL); lambda = gsl_vector_get(tm->anova_lambda, ID1); GetR(bAlt[mtype]->Res, tm->corr, lambda, Rlambda); GeeWald(bAlt[mtype], L1, bStat); } else if ( tm->test == SCORE ) { bNull[mtype]->regression(bY,X0,bO,NULL); lambda = gsl_vector_get(tm->anova_lambda, ID0); GetR(bNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(X1, bNull[mtype], bStat); } else { bNull[mtype]->regression(bY,X0,bO,NULL); addXrow2(bNull[mtype]->Beta, &ref1.vector, BetaO); bAlt[mtype]->regression(bY,X1,bO,BetaO); GeeLR(bAlt[mtype], bNull[mtype], bStat); } // ----- get multivariate counts ------- // buj = gsl_vector_ptr (bStat,0); suj = gsl_matrix_ptr (anovaStat, i-1, 0); puj = gsl_matrix_ptr (Panova, i-1, 0); if ( *(buj) > (*(suj)-1e-8) ) *puj=*puj+1; // ------ get univariate counts ---------// calcAdjustP(tm->punit,nVars,buj+1,suj+1,puj+1,sortid); nSamp++; // Prompts if ((tm->showtime==TRUE)&(j%100==0)) { dif = (float)(clock() - clk_start)/(float)CLOCKS_PER_SEC; timelast+=(double)dif/60; printf("\tResampling run %d finished. Time elapsed: %.2f minutes...\n", j, timelast); clk_start=clock(); } } // end j for loop // ========= get p-values ======== // if ( tm->punit == FREESTEP) { puj = gsl_matrix_ptr (Panova, i-1, 1); reinforceP(puj, nVars, sortid); } if (BetaO!=NULL) gsl_matrix_free(BetaO); if (X0!=NULL) gsl_matrix_free(X0); if (X1!=NULL) gsl_matrix_free(X1); if (tm->test == WALD) { if (L1!=NULL) gsl_matrix_free(L1); if (tmp1!=NULL) gsl_matrix_free(tmp1); } } // end i for loop and test for loop // p = (#exceeding observed stat + 1)/(#nboot+1) gsl_matrix_add_constant (Panova, 1.0); gsl_matrix_scale (Panova, (double)1/(nSamp+1.0)); bAlt[mtype]->releaseGlm(); PtrAlt[mtype]->releaseGlm(); if ( tm->test!=WALD ) { bNull[mtype]->releaseGlm(); PtrNull[mtype]->releaseGlm(); } delete []rdf; if (sortid != NULL ) gsl_permutation_free(sortid); gsl_vector_free(bStat); gsl_matrix_free(bY); if (bO!=NULL) gsl_matrix_free(bO); return SUCCESS; }
AnovaTest::AnovaTest(mv_Method *mm, gsl_matrix *Y, gsl_matrix *X, gsl_matrix *isXvarIn):mmRef(mm), Yref(Y), Xref(X), inRef(isXvarIn) { unsigned int hid, aid; unsigned int i, j, count; nModels=inRef->size1, nParam=Xref->size2; nRows=Yref->size1, nVars=Yref->size2; // printf("initialize public variables: stats\n"); multstat=(double *)malloc((nModels-1)*sizeof(double)); Pmultstat = (double *)malloc((nModels-1)*sizeof(double)); for (j=0; j<nModels-1; j++) *(Pmultstat+j)=0.0; dfDiff = (unsigned int *)malloc((nModels-1)*sizeof(unsigned int)); statj = gsl_matrix_alloc(nModels-1, nVars); Pstatj = gsl_matrix_alloc(nModels-1, nVars); gsl_matrix_set_zero(Pstatj); bStatj = gsl_vector_alloc(nVars); Hats = (mv_mat *)malloc(nModels*sizeof(mv_mat)); sortid = (gsl_permutation **)malloc((nModels-1)*sizeof(gsl_permutation *)); for (i=0; i<nModels; i++ ) { // Hats[i] Hats[i].mat=gsl_matrix_alloc(nRows, nRows); Hats[i].SS=gsl_matrix_alloc(nVars, nVars); Hats[i].R=gsl_matrix_alloc(nVars, nVars); Hats[i].Res=gsl_matrix_alloc(nRows, nVars); Hats[i].Y = gsl_matrix_alloc(nRows, nVars); Hats[i].sd = gsl_vector_alloc(nVars); count = 0; for (j=0; j<nParam; j++){ count+=(unsigned int)gsl_matrix_get(inRef, i, j); } // printf("count=%d \n", count); Hats[i].X = gsl_matrix_alloc(nRows, count); Hats[i].Coef=gsl_matrix_alloc(count, nVars); gsl_vector_view refi=gsl_matrix_row(inRef, i); subX(Xref, &refi.vector, Hats[i].X); calcSS(Yref, &(Hats[i]), mmRef); // displaymatrix(Hats[i].SS, "SS"); } for (i=1; i<nModels; i++) { hid = i; aid = i-1; if ( mmRef->resamp != CASEBOOT ) { // fit = Y- resi gsl_matrix_memcpy (Hats[i].Y, Yref); gsl_matrix_sub (Hats[i].Y, Hats[i].Res); } gsl_vector_view statij = gsl_matrix_row(statj, aid); testStatCalc(&(Hats[hid]), &(Hats[aid]), mmRef, TRUE, (multstat+aid), &statij.vector); dfDiff[aid] = Hats[aid].X->size2-Hats[hid].X->size2; // sortid sortid[aid] = gsl_permutation_alloc(nVars); gsl_sort_vector_index (sortid[aid], &statij.vector); // rearrange sortid in descending order gsl_permutation_reverse (sortid[aid]); } // initialize resampling indices // getBootID(); done in R bootID = NULL; // Initialize GSL rnd environment variables const gsl_rng_type *T; gsl_rng_env_setup(); T = gsl_rng_default; // an mt19937 generator with a seed of 0 rnd = gsl_rng_alloc(T); if (mmRef->reprand!=TRUE){ struct timeval tv; // seed generation based on time gettimeofday(&tv, 0); unsigned long mySeed=tv.tv_sec + tv.tv_usec; gsl_rng_set(rnd, mySeed); // reset seed } // printf("Anova test initialized.\n"); }