int AnovaTest::anovaresi(gsl_matrix *bY, const unsigned int i) { unsigned int hid=i, aid = i-1; // count the right-hand tails calcSS(bY, &(Hats[aid]), mmRef); calcSS(bY, &(Hats[hid]), mmRef); testStatCalc(&(Hats[hid]), &(Hats[aid]), mmRef, TRUE, &(bMultStat), bStatj); // count data related to P-values if (bMultStat >= multstat[aid]) Pmultstat[aid]++; // get result ptr corresponds to model i double *sj = gsl_matrix_ptr (statj, aid, 0); double *pj = gsl_matrix_ptr (Pstatj, aid, 0); double *bj = gsl_vector_ptr (bStatj, 0); calcAdjustP(mmRef->punit, nVars, bj, sj, pj, sortid[aid]); return 0; }
int AnovaTest::anovacase(gsl_matrix *bY, gsl_matrix *bX) { unsigned int j; // if Y col is all zeros for ( j=0; j<nVars; j++ ){ gsl_vector_view colj = gsl_matrix_column(bY, j); if ( gsl_vector_isnull(&colj.vector) == TRUE ) return GSL_ERANGE; } unsigned int i, hid, aid; double *sj, *pj, *bj; gsl_matrix *Z = gsl_matrix_alloc(nRows, nVars); gsl_matrix_memcpy(Z, bY); // Hats.X for (i=0; i<nModels-1; i++){ hid = i+1; aid = i; gsl_vector_view ref1 = gsl_matrix_row(inRef, aid); subX(bX, &ref1.vector, Hats[aid].X); gsl_vector_view ref0 = gsl_matrix_row(inRef, hid); subX(bX, &ref0.vector, Hats[hid].X); //Y = X*coef gsl_blas_dgemm(CblasNoTrans,CblasNoTrans,-1.0,Hats[aid].X,Hats[aid].Coef,0.0,Z); //Z = bY - Yhat; gsl_matrix_add (Z, bY); // calc teststats calcSS(Z, &(Hats[hid]), mmRef); calcSS(Z, &(Hats[aid]), mmRef); testStatCalc(&(Hats[hid]), &(Hats[aid]), mmRef, TRUE, &(bMultStat), bStatj); if (bMultStat >= multstat[i]) Pmultstat[i]++; sj = gsl_matrix_ptr (statj, i, 0); pj = gsl_matrix_ptr (Pstatj, i, 0); bj = gsl_vector_ptr (bStatj, 0); calcAdjustP(mmRef->punit, nVars, bj, sj, pj, sortid[i]); } gsl_matrix_free(Z); return 0; }
int GlmTest::summary(glm *fit) { double lambda; unsigned int k; unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam; unsigned int mtype = fit->mmRef->model-1; PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef); BinGlm binNull(fit->mmRef), binAlt(fit->mmRef); NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef); glm *PtrNull[3] = { &pNull, &nbNull, &binNull }; glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt }; gsl_vector_view teststat, unitstat; gsl_matrix_view L1; // To estimate initial Beta from PtrNull->Beta // gsl_vector *ref=gsl_vector_alloc(nParam); // gsl_matrix *BetaO=gsl_matrix_alloc(nParam, nVars); smryStat = gsl_matrix_alloc((nParam+1), nVars+1); Psmry = gsl_matrix_alloc((nParam+1), nVars+1); gsl_matrix_set_zero (Psmry); // initialize the design matrix for all hypo tests GrpMat *GrpXs = (GrpMat *)malloc((nParam+2)*sizeof(GrpMat)); GrpXs[0].matrix = gsl_matrix_alloc(nRows, nParam); gsl_matrix_memcpy(GrpXs[0].matrix, fit->Xref); // the alt X GrpXs[1].matrix = gsl_matrix_alloc(nRows, 1); // overall test gsl_matrix_set_all (GrpXs[1].matrix, 1.0); for (k=2; k<nParam+2; k++) { // significance tests GrpXs[k].matrix = gsl_matrix_alloc(nRows, nParam-1); subX2(fit->Xref, k-2, GrpXs[k].matrix); } // Calc test statistics if ( tm->test == WALD ) { // the overall test compares to mean teststat = gsl_matrix_row(smryStat, 0); L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam); lambda=gsl_vector_get(tm->smry_lambda, 0); GetR(fit->Res, tm->corr, lambda, Rlambda); GeeWald(fit, &L1.matrix, &teststat.vector); // the significance test for (k=2; k<nParam+2; k++) { teststat = gsl_matrix_row(smryStat, k-1); L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam); GeeWald(fit, &L1.matrix, &teststat.vector); } } else if (tm->test==SCORE) { for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(smryStat, k-1); PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); lambda=gsl_vector_get(tm->smry_lambda, k); GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector); } } else { for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(smryStat, k-1); PtrNull[mtype]->regression(fit->Yref,GrpXs[k].matrix,fit->Oref,NULL); GeeLR(fit, PtrNull[mtype], &teststat.vector); // works better } } // sort id if the unitvaraite test is free step-down gsl_permutation **sortid; sortid=(gsl_permutation **)malloc((nParam+1)*sizeof(gsl_permutation *)); for ( k=0; k<(nParam+1); k++ ) { teststat = gsl_matrix_row (smryStat, k); unitstat = gsl_vector_subvector(&teststat.vector, 1, nVars); sortid[k] = gsl_permutation_alloc(nVars); gsl_sort_vector_index (sortid[k], &unitstat.vector); gsl_permutation_reverse(sortid[k]); // rearrange in descending order } if (tm->resamp==MONTECARLO) { lambda=gsl_vector_get(tm->smry_lambda,0); GetR(fit->Res, tm->corr, lambda, Sigma); setMonteCarlo(fit, XBeta, Sigma); } nSamp=0; double *suj, *buj, *puj; gsl_matrix *bStat = gsl_matrix_alloc((nParam+1), nVars+1); gsl_matrix_set_zero (bStat); gsl_matrix *bY = gsl_matrix_alloc(nRows, nVars); gsl_matrix *bO = gsl_matrix_alloc(nRows, nVars); gsl_matrix_memcpy (bO, fit->Eta); double diff, timelast=0; clock_t clk_start=clock(); for ( unsigned int i=0; i<tm->nboot; i++) { if ( tm->resamp==CASEBOOT ) resampSmryCase(fit,bY,GrpXs,bO,i); else resampNonCase(fit, bY, i); if ( tm->test == WALD ) { PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL); // the overall test compares to mean teststat = gsl_matrix_row(bStat, 0); L1=gsl_matrix_submatrix(L,1,0,nParam-1,nParam); lambda=gsl_vector_get(tm->smry_lambda, 0); GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda); GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector); // the significance test for (k=2; k<nParam+2; k++) { teststat = gsl_matrix_row(bStat, k-1); L1 = gsl_matrix_submatrix(L, k-2, 0, 1, nParam); GeeWald(PtrAlt[mtype], &L1.matrix, &teststat.vector); } } else if (tm->test==SCORE) { for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(bStat, k-1); PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); lambda=gsl_vector_get(tm->smry_lambda,k); GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(GrpXs[0].matrix, PtrNull[mtype], &teststat.vector); } } else { // use single bAlt estimate works better PtrAlt[mtype]->regression(bY,GrpXs[0].matrix,bO,NULL); for (k=1; k<nParam+2; k++) { teststat=gsl_matrix_row(bStat, k-1); PtrNull[mtype]->regression(bY,GrpXs[k].matrix,bO,NULL); GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector); } } for (k=0; k<(nParam+1); k++) { buj = gsl_matrix_ptr (bStat, k, 0); suj = gsl_matrix_ptr (smryStat, k, 0); puj = gsl_matrix_ptr (Psmry, k, 0); if ( *buj >= *suj ) *puj=*puj+1; calcAdjustP(tm->punit, nVars, buj+1, suj+1, puj+1, sortid[k]); } // end for j loop nSamp++; // Prompts if ((tm->showtime==TRUE)&(i%100==0)) { diff=(float)(clock()-clk_start)/(float)CLOCKS_PER_SEC; timelast+=(double)diff/60; printf("\tResampling run %d finished. Time elapsed: %.2f min ...\n", i, timelast); clk_start=clock(); } } // end for i loop // ========= Get P-values ========= // if ( tm->punit == FREESTEP ) { for (k=0; k<(nParam+1); k++) { puj = gsl_matrix_ptr (Psmry, k, 1); reinforceP( puj, nVars, sortid[k] ); } } // p = (#exceeding observed stat + 1)/(#nboot+1) gsl_matrix_add_constant (Psmry, 1.0); gsl_matrix_scale (Psmry, (double)1.0/(nSamp+1)); for (k=0; k<nVars; k++) aic[k]=-fit->ll[k]+2*(nParam+1); // === release memory ==== // PtrAlt[mtype]->releaseGlm(); if ( tm->test!=WALD ) PtrNull[mtype]->releaseGlm(); gsl_matrix_free(bStat); gsl_matrix_free(bY); gsl_matrix_free(bO); for (k=0; k<nParam+1; k++) if (sortid[k]!=NULL) gsl_permutation_free(sortid[k]); free(sortid); if ( GrpXs != NULL ) { for ( unsigned int k=0; k<nParam+2; k++ ) if ( GrpXs[k].matrix != NULL ) gsl_matrix_free (GrpXs[k].matrix); free(GrpXs); } return SUCCESS; }
int GlmTest::anova(glm *fit, gsl_matrix *isXvarIn) { // Assume the models have been already sorted (in R) Xin = isXvarIn; nModels = Xin->size1; double *rdf = new double [nModels]; unsigned int nP, i, j, k; unsigned int ID0, ID1, nP0, nP1; unsigned int nRows=tm->nRows, nVars=tm->nVars, nParam=tm->nParam; unsigned int mtype = fit->mmRef->model-1; dfDiff = new unsigned int [nModels-1]; anovaStat = gsl_matrix_alloc((nModels-1), nVars+1); Panova = gsl_matrix_alloc((nModels-1), nVars+1); gsl_vector *bStat = gsl_vector_alloc(nVars+1); gsl_matrix_set_zero (anovaStat); gsl_matrix_set_zero (Panova); gsl_vector_set_zero (bStat); PoissonGlm pNull(fit->mmRef), pAlt(fit->mmRef); BinGlm binNull(fit->mmRef), binAlt(fit->mmRef); NBinGlm nbNull(fit->mmRef), nbAlt(fit->mmRef); PoissonGlm pNullb(fit->mmRef), pAltb(fit->mmRef); BinGlm binNullb(fit->mmRef), binAltb(fit->mmRef); NBinGlm nbNullb(fit->mmRef), nbAltb(fit->mmRef); glm *PtrNull[3] = { &pNull, &nbNull, &binNull }; glm *PtrAlt[3] = { &pAlt, &nbAlt, &binAlt }; glm *bNull[3] = { &pNullb, &nbNullb, &binNullb }; glm *bAlt[3] = { &pAltb, &nbAltb, &binAltb }; double *suj, *buj, *puj; gsl_vector_view teststat, unitstat,ref1, ref0; gsl_matrix *X0=NULL, *X1=NULL, *L1=NULL, *tmp1=NULL, *BetaO=NULL; gsl_matrix *bO=NULL, *bY=gsl_matrix_alloc(nRows, nVars); bO = gsl_matrix_alloc(nRows, nVars); gsl_permutation *sortid=NULL; if (tm->punit==FREESTEP) sortid = gsl_permutation_alloc(nVars); // ======= Fit the (first) Alt model =========// for (i=0; i<nModels; i++) { nP = 0; for (k=0; k<nParam; k++) if (gsl_matrix_get(Xin,i,k)!=FALSE) nP++; rdf[i] = nRows-nP; } for (i=1; i<nModels; i++) { // ======= Fit the Null model =========// ID0 = i; ID1 = i-1; nP0 = nRows - (unsigned int)rdf[ID0]; nP1 = nRows - (unsigned int)rdf[ID1]; // Degrees of freedom dfDiff[i-1] = nP1 - nP0; ref1=gsl_matrix_row(Xin, ID1); ref0=gsl_matrix_row(Xin, ID0); X0 = gsl_matrix_alloc(nRows, nP0); subX(fit->Xref, &ref0.vector, X0); X1 = gsl_matrix_alloc(nRows, nP1); subX(fit->Xref, &ref1.vector, X1); // ======= Get multivariate test statistics =======// // Estimate shrinkage parametr only once under H1 // See "FW: Doubts R package "mvabund" (12/14/11) teststat = gsl_matrix_row(anovaStat, (i-1)); PtrNull[mtype]->regression(fit->Yref, X0, fit->Oref, NULL); if (tm->test == SCORE) { lambda = gsl_vector_get(tm->anova_lambda, ID0); GetR(PtrNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(X1, PtrNull[mtype], &teststat.vector); } else if (tm->test==WALD) { PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, NULL); L1 = gsl_matrix_alloc (nP1-nP0, nP1); tmp1 = gsl_matrix_alloc (nParam, nP1); subX(L, &ref1.vector, tmp1); subXrow1(tmp1, &ref0.vector, &ref1.vector, L1); lambda = gsl_vector_get(tm->anova_lambda, ID1); GetR(PtrAlt[mtype]->Res, tm->corr, lambda, Rlambda); GeeWald(PtrAlt[mtype], L1, &teststat.vector); } else { BetaO = gsl_matrix_alloc(nP1, nVars); addXrow2(PtrNull[mtype]->Beta, &ref1.vector, BetaO); PtrAlt[mtype]->regression(fit->Yref, X1, fit->Oref, BetaO); GeeLR(PtrAlt[mtype], PtrNull[mtype], &teststat.vector); } if (tm->resamp==MONTECARLO) { lambda=gsl_vector_get(tm->anova_lambda,ID0); GetR(fit->Res, tm->corr, lambda, Sigma); setMonteCarlo (PtrNull[mtype], XBeta, Sigma); } // ======= Get univariate test statistics =======// if (tm->punit == FREESTEP) { unitstat=gsl_vector_subvector(&teststat.vector,1,nVars); gsl_sort_vector_index (sortid, &unitstat.vector); gsl_permutation_reverse(sortid); } // ======= Get resampling distribution under H0 ===== // nSamp=0; double dif, timelast=0; clock_t clk_start=clock(); if (tm->showtime==TRUE) printf("Resampling begins for test %d.\n", i); for (j=0; j<tm->nboot; j++) { // printf("simu %d :", j); gsl_vector_set_zero (bStat); if ( tm->resamp == CASEBOOT ) { resampAnovaCase(PtrAlt[mtype],bY,X1,bO,j); subX(X1, &ref0.vector, X0); } else { resampNonCase(PtrNull[mtype], bY, j); gsl_matrix_memcpy(bO, fit->Oref); } if ( tm->test == WALD ) { bAlt[mtype]->regression(bY,X1,bO,NULL); lambda = gsl_vector_get(tm->anova_lambda, ID1); GetR(bAlt[mtype]->Res, tm->corr, lambda, Rlambda); GeeWald(bAlt[mtype], L1, bStat); } else if ( tm->test == SCORE ) { bNull[mtype]->regression(bY,X0,bO,NULL); lambda = gsl_vector_get(tm->anova_lambda, ID0); GetR(bNull[mtype]->Res, tm->corr, lambda, Rlambda); GeeScore(X1, bNull[mtype], bStat); } else { bNull[mtype]->regression(bY,X0,bO,NULL); addXrow2(bNull[mtype]->Beta, &ref1.vector, BetaO); bAlt[mtype]->regression(bY,X1,bO,BetaO); GeeLR(bAlt[mtype], bNull[mtype], bStat); } // ----- get multivariate counts ------- // buj = gsl_vector_ptr (bStat,0); suj = gsl_matrix_ptr (anovaStat, i-1, 0); puj = gsl_matrix_ptr (Panova, i-1, 0); if ( *(buj) > (*(suj)-1e-8) ) *puj=*puj+1; // ------ get univariate counts ---------// calcAdjustP(tm->punit,nVars,buj+1,suj+1,puj+1,sortid); nSamp++; // Prompts if ((tm->showtime==TRUE)&(j%100==0)) { dif = (float)(clock() - clk_start)/(float)CLOCKS_PER_SEC; timelast+=(double)dif/60; printf("\tResampling run %d finished. Time elapsed: %.2f minutes...\n", j, timelast); clk_start=clock(); } } // end j for loop // ========= get p-values ======== // if ( tm->punit == FREESTEP) { puj = gsl_matrix_ptr (Panova, i-1, 1); reinforceP(puj, nVars, sortid); } if (BetaO!=NULL) gsl_matrix_free(BetaO); if (X0!=NULL) gsl_matrix_free(X0); if (X1!=NULL) gsl_matrix_free(X1); if (tm->test == WALD) { if (L1!=NULL) gsl_matrix_free(L1); if (tmp1!=NULL) gsl_matrix_free(tmp1); } } // end i for loop and test for loop // p = (#exceeding observed stat + 1)/(#nboot+1) gsl_matrix_add_constant (Panova, 1.0); gsl_matrix_scale (Panova, (double)1/(nSamp+1.0)); bAlt[mtype]->releaseGlm(); PtrAlt[mtype]->releaseGlm(); if ( tm->test!=WALD ) { bNull[mtype]->releaseGlm(); PtrNull[mtype]->releaseGlm(); } delete []rdf; if (sortid != NULL ) gsl_permutation_free(sortid); gsl_vector_free(bStat); gsl_matrix_free(bY); if (bO!=NULL) gsl_matrix_free(bO); return SUCCESS; }