int main() { srand(time(0)); std::cout << add(1.12, 2.0) << std::endl; std::cout << add_auto(1.12, 2.0) << std::endl; std::cout << "POW! 2^10 " << pow(2, 10) << std::endl; std::cout << "8^0 " << pow(8, 0) << std::endl; std::vector<double> test_v = random_vector(12); print_vector(test_v); sort_vector(test_v); std::cout << "Sorted: " << std::endl; print_vector(test_v); return 0; }
//this IS the EM...estimate away int classifier::fit2(segment * data, vector<double> mu_seeds, int topology, int elon_move ){ //========================================================================= //compute just a uniform model...no need for the EM if (K == 0){ ll = 0; double l = (data->maxX-data->minX); double pos = 0; double neg = 0; for (int i = 0; i < data->XN; i++){ pos+=data->X[1][i]; neg+=data->X[2][i]; } double pi = pos / (pos + neg); for (int i = 0; i < data->XN; i++){ if (pi > 0){ ll+=log(pi / l)*data->X[1][i]; } if (pi < 1){ ll+=log((1-pi) / l)*data->X[2][i]; } } components = new component[1]; return 1; } random_device rd; mt19937 mt(rd()); int add = noise_max>0; components = new component[K+add]; //=========================================================================== //initialize(1) components with user defined hyperparameters for (int k = 0; k < K; k++){ components[k].set_priors(ALPHA_0, BETA_0, ALPHA_1, BETA_1, ALPHA_2, ALPHA_3,data->N, K); } //=========================================================================== //random seeding, initialize(2), center of pausing components int i = 0; double mu; double mus[K]; for (int k = 0; k < K; k++){ if (mu_seeds.size()>0 ){ i = sample_centers(mu_seeds , p); mu = mu_seeds[i]; if (r_mu > 0){ normal_distribution<double> dist_r_mu(mu, r_mu); mu = dist_r_mu(mt); } }else{ normal_distribution<double> dist_MU((data->minX+data->maxX)/2., r_mu); mu = dist_MU(mt); } mus[k] = mu; if (mu_seeds.size() > 0 ){ mu_seeds.erase (mu_seeds.begin()+i); } } sort_vector(mus, K); for (int k = 0; k < K;k++){ //random seeding, intializ(3) other parameters components[k].initialize_bounds(mus[k], data, K, data->SCALE , 0., topology,foot_print, data->maxX, data->maxX); } sort_components(components, K); for (int k = 0; k < K; k++){ if (k > 0){ components[k].reverse_neighbor = &components[k-1]; }else{ components[k].reverse_neighbor = NULL; } if (k+1 < K){ components[k].forward_neighbor = &components[k+1]; }else{ components[k].forward_neighbor = NULL; } } if (add){ components[K].initialize_bounds(0., data, 0., 0. , noise_max, pi, foot_print, data->minX, data->maxX); } //=========================================================================== int t = 0; //EM loop ticker double prevll = nINF; //previous iterations log likelihood converged = false; //has the EM converged? int u = 0; //elongation movement ticker double norm_forward, norm_reverse,N; //helper variables while (t < max_iterations && not converged){ //====================================================== //reset old sufficient statistics for (int k=0; k < K+add; k++){ //components[k].print(); components[k].reset(); if (components[k].EXIT){ converged=false, ll=nINF; return 0; } } //====================================================== //E-step, grab all the stats and responsiblities ll = 0; for (int i =0; i < data->XN;i++){ norm_forward=0; norm_reverse=0; for (int k=0; k < K+add; k++){ //computing the responsbility terms if (data->X[1][i]){//if there is actually data point here... norm_forward+=components[k].evaluate(data->X[0][i],1); } if (data->X[2][i]){//if there is actually data point here... norm_reverse+=components[k].evaluate(data->X[0][i],-1); } } if (norm_forward > 0){ ll+=LOG(norm_forward)*data->X[1][i]; } if (norm_reverse > 0){ ll+=LOG(norm_reverse)*data->X[2][i]; } //now we need to add the sufficient statistics, need to compute expectations for (int k=0; k < K+add; k++){ if (norm_forward){ components[k].add_stats(data->X[0][i], data->X[1][i], 1, norm_forward); } if (norm_reverse){ components[k].add_stats(data->X[0][i], data->X[2][i], -1, norm_reverse); } } } //====================================================== //M-step N=0; //get normalizing constant for (int k = 0; k < K+add; k++){ N+=(components[k].get_all_repo()); } for (int k = 0; k < K+add; k++){ components[k].update_parameters(N, K); } if (abs(ll-prevll)<convergence_threshold){ converged=true; } if (not isfinite(ll)){ ll = nINF; return 0; } //====================================================== //should we try to move the uniform component? if (u > 200 ){ sort_components(components, K); //check_mu_positions(components, K); if (elon_move){ update_j_k(components,data, K, N); update_l(components, data, K); } u = 0; } u++; t++; prevll=ll; } return 1; }
void adj_by_T(GENE_DATA* pdata,float* T,float* P,float*Adj_P, FUNC_STAT func_stat,FUNC_SAMPLE func_first_sample, FUNC_SAMPLE func_next_sample,FUNC_CMP func_cmp,const void* extra) { int b=0,*bL,i,is_next,*total1,*R,*total2; float *bT, *count1,*count2,qT;/*qT is the successiv maxima*/ int ncol=pdata->ncol; int nrow=pdata->nrow; int B=(*func_first_sample)(NULL); /*allocate the space and initialziation*/ assert(bT=(float*)Calloc(nrow,float)); assert(bL=(int*)Calloc(ncol,int)); assert(count1=(float*)Calloc(nrow,float)); memset(count1,0,sizeof(float)*nrow); assert(total1=(int*)Calloc(nrow,int)); memset(total1,0,sizeof(int)*nrow); assert(count2=(float*)Calloc(nrow,float)); memset(count2,0,sizeof(float)*nrow); assert(total2=(int*)Calloc(nrow,int)); memset(total2,0,sizeof(int)*nrow); assert(R=(int*)Calloc(nrow,int)); /*comuter the original t-statfirst*/ compute_test_stat(pdata,pdata->L,T,func_stat,extra); /*sort the T*/ order_data(T,R,nrow,func_cmp); sort_gene_data(pdata,R); sort_vector(T,R,nrow); /*iteration for permutaion*/ (*func_first_sample)(bL); /*changed to the orignal stat, which is monotone of t and centered*/ is_next=1; b=0; while(is_next){ compute_test_stat(pdata,bL,bT,func_stat,extra); /*deal with unajdused value first*/ for(i=0;i<nrow;i++){ if(T[i]==NA_FLOAT) continue; if(bT[i]!=NA_FLOAT){ if((func_cmp==cmp_high)&&(bT[i]+EPSILON>=T[i])) count2[i]++; if((func_cmp==cmp_low)&&(bT[i]<=T[i]+EPSILON)) count2[i]++; if((func_cmp==cmp_abs)&&(fabs(bT[i])>=fabs(T[i])-EPSILON)) count2[i]++; total2[i]++; } } /*deal with adjusted values*/ qT=NA_FLOAT;/*intitalize the qT*/ for(i=nrow-1;i>=0;i--){ /*looping the row reversely*/ if(T[i]==NA_FLOAT) continue; /* right now I only implements the 3 cases, which are pretty common*/ if(func_cmp==cmp_high){ if((bT[i]!=NA_FLOAT)&&(qT!=NA_FLOAT)&&(bT[i]>qT)) qT=bT[i]; if((bT[i]!=NA_FLOAT)&&(qT==NA_FLOAT)) qT=bT[i]; if((qT!=NA_FLOAT)&&(qT>=T[i]-EPSILON)) count1[i]+=1; }else if(func_cmp==cmp_low){ if((bT[i]!=NA_FLOAT)&&(qT!=NA_FLOAT)&&(bT[i]<qT)) qT=bT[i]; if((bT[i]!=NA_FLOAT)&&(qT==NA_FLOAT)) qT=bT[i]; if((qT!=NA_FLOAT)&&(qT<=T[i]+EPSILON)) count1[i]+=1; }else if (func_cmp==cmp_abs) { if((bT[i]!=NA_FLOAT)&&(qT!=NA_FLOAT)&&(fabs(bT[i])>qT)) qT=fabs(bT[i]); if((bT[i]!=NA_FLOAT)&&(qT==NA_FLOAT)) qT=fabs(bT[i]); if((qT!=NA_FLOAT)&&(qT>=fabs(T[i])-EPSILON)) count1[i]+=1; } if(qT!=NA_FLOAT) total1[i]++; } b++; print_b(b,B,"b="); is_next=(*func_next_sample)(bL); } /*summarize the results*/ /*unadjusted one*/ for(i=0;i<nrow;i++){ if(total2[i]==0) P[i]=NA_FLOAT; else P[i]=count2[i]*1.0/total2[i]; } /*adjused one*/ for(i=0;i<nrow;i++){ if(total1[i]==0) Adj_P[i]=NA_FLOAT; else Adj_P[i]=count1[i]*1.0/total1[i]; } /*enforce the montonicity*/ for(i=1;i<nrow;i++) if(Adj_P[i]<Adj_P[i-1]) Adj_P[i]=Adj_P[i-1]; /*free the spaces*/ Free(bT); Free(count1); Free(total1); Free(count2); Free(total2); Free(bL); Free(R); }
void adj_pvalue_quick(GENE_DATA* pdata,float*T, float* P, float* Adj_P,float* Adj_Lower, FUNC_STAT func_stat,FUNC_STAT func_stat_T, FUNC_SAMPLE func_first_sample, FUNC_SAMPLE func_next_sample,FUNC_CMP func_cmp,const void* extra) { int *L,b,B,B_new,i,*R,neq; /*b for simulation*, neq is for the number of equal signs*/ float* all_P,*all_Q,count; int ncol=pdata->ncol,nrow=pdata->nrow; /*allocate the space*/ B=(*func_first_sample)(NULL); assert(L=(int*)Calloc(ncol,int)); assert(R=(int*)Calloc(nrow,int)); assert(all_P=(float*)Calloc(B,float)); assert(all_Q=(float*)Calloc(B,float)); /*get the original unadjusted p-values first we'll use the normalized t-statistics*/ get1pvalue(pdata,pdata->L,T,P,func_stat_T,func_first_sample,func_next_sample,func_cmp,extra); if(myDEBUG) { print_farray(stderr,T,pdata->nrow); print_farray(stderr,P,pdata->nrow); } /*sort the test_stat*/ order_mult_data(R,nrow,2,P,cmp_low,T,func_cmp); /*order_data(P,R,nrow,func_cmp);*/ /*rearrange the data according the unadjusted p-values*/ sort_gene_data(pdata,R); sort_vector(T,R,nrow); sort_vector(P,R,nrow); /*initialze all_Q[]=NA_FLOAT*/ for(b=0;b<B;b++) all_Q[b]=NA_FLOAT; /*loop for each gene*/ for(i=nrow-1;i>=0;i--){ get_all_samples_P(pdata->d[i],ncol,all_P,pdata->na, func_stat,func_first_sample,func_next_sample,func_cmp,extra); if(myDEBUG) print_farray(stderr,all_P,B); /*update all_Q*/ count=0; B_new=0; neq=0; for(b=0;b<B;b++){ if (all_P[b]==NA_FLOAT) break;/*we don't need care about NA pvlaues*/ if(all_Q[b]>all_P[b]) all_Q[b]=all_P[b];/*update q* by the value p*/ if(all_Q[b]==NA_FLOAT) continue;/*skip NA q*/ if(all_Q[b]<P[i]){ count+=1; }else if (all_Q[b]<=P[i]+EPSILON)/*it'd already > */ neq++; B_new++; } if(myDEBUG) { print_farray(stderr,all_Q,B); fprintf(stderr,"P[%d]=%5.3f,count=%5.2f,neq=%d\n",i,P[i],count,neq); } /*assign the Adj_P and Adj_Lower for gene i */ if(B_new!=0) { Adj_P[i]=(count+neq)/B_new; if(neq==0) Adj_Lower[i]=count/B_new; else Adj_Lower[i]=(count+1)/B_new; } else { Adj_P[i]=NA_FLOAT; Adj_Lower[i]=NA_FLOAT; } /*************************** */ print_b((nrow-i),nrow,"r="); } /* to make monotone of Adj_P and Adj_Lower*/ for(i=1;i<nrow;i++) if(Adj_P[i]<Adj_P[i-1]) Adj_P[i]=Adj_P[i-1]; for(i=1;i<nrow;i++) if(Adj_Lower[i]<Adj_Lower[i-1]) Adj_Lower[i]=Adj_Lower[i-1]; /*free the spaces*/ Free(L); Free(R); Free(all_P); Free(all_Q); }