static void intermediate_point (gsl_multimin_function_fdf * fdf, const gsl_vector * x, const gsl_vector * p, double lambda, double pg, double stepa, double stepc, double fa, double fc, gsl_vector * x1, gsl_vector * dx, gsl_vector * gradient, double * step, double * f) { double stepb, fb; trial: { double u = fabs (pg * lambda * stepc); stepb = 0.5 * stepc * u / ((fc - fa) + u); } take_step (x, p, stepb, lambda, x1, dx); if (gsl_vector_equal (x, x1)) { /* Take fast exit if trial point does not move from initial point */ #ifdef DEBUG printf ("fast exit x == x1 for stepb = %g\n", stepb); #endif *step = 0; *f = fa; GSL_MULTIMIN_FN_EVAL_DF(fdf, x1, gradient); return ; } fb = GSL_MULTIMIN_FN_EVAL_F (fdf, x1); #ifdef DEBUG printf ("trying stepb = %g fb = %.18e\n", stepb, fb); #endif if (fb >= fa && stepb > 0.0) { /* downhill step failed, reduce step-size and try again */ fc = fb; stepc = stepb; goto trial; } #ifdef DEBUG printf ("ok!\n"); #endif *step = stepb; *f = fb; GSL_MULTIMIN_FN_EVAL_DF(fdf, x1, gradient); }
void nested_mpi::make_chain(){ //std::cout << "now we are making the chain....\n" << std::flush; int converged=0; do { choose_cpu_step(); take_step(); // If dump info, do it here and reset counters... if (dump_nb_mpi >= dump_lenght && myid == 0) { std::string name_here; calc_weights(); name_here=tag_name+"_pre_pre_chain"; dump_nb_mpi=0; //warning/N.B - these saves save from the start of the chain each time! save();save_pre(name_here); } if (conv_nb_mpi >= conv_lenght) { calc_weights(); conv_nb_mpi=0; converged=check_convergence(); if (myid == 0) if (verbose >= 1) std::cout<<"Convergence test:"<<converged<<"\n"<<std::flush; } } while ( steps_taken<=nb_steps && converged == 0 ); if (myid == 0) { std::cout<<"Finished:\n"<<std::flush; std::string name_here; // Now use calc_final_weights calc_final_weights(); name_here=tag_name+"_pre_pre_final_chain"; dump_nb_mpi=0; save();save_pre(name_here); if (verbose >= 1) std::cout<<"Saved\n"<<std::flush; } }
/** @short Markov Chain Monte Carlo @param rng the GSL random number generator @param p the vector of parameters being searched over @param x the vector of observations */ void mcmc(const gsl_rng *rng, gsl_vector *p, const gsl_vector *x) { gsl_vector * p_test = gsl_vector_alloc(p->size); size_t i; gsl_histogram * hist = gsl_histogram_alloc(nbins); gsl_histogram_set_ranges_uniform(hist, min, max); for(i=0; i < chain_length; i++) { gsl_vector_memcpy(p_test,p); propose(rng, p_test); double lnLikelihoodRatio = ln_lik_fn(p_test, x) - ln_lik_fn(p, x); if (take_step(rng, lnLikelihoodRatio)) p = p_test; gsl_histogram_increment(hist, p->data[1]); } gsl_vector_free(p_test); gsl_histogram_fprintf(stdout, hist, "%g", "%g"); gsl_histogram_free(hist); }
Results* join_clusters2_restart (double *x,//array/matrix of data SymNoDiag *W,//lower triangle of weight matrix unsigned int Px,//problem size double lambda,//starting point in regularization path double join_thresh, //tolerance for equality of points double opt_thresh, //tolerance for optimality double lambda_factor,//increase of lambda after optimality double smooth,//smoothing parameter int maxit, int linesearch_freq,//how often to do a linesearch? if 0, never. if //n>0, do n-1 linesearch steps for every //decreasing step size step. set this to 2 if //unsure. int linesearch_points,//how many points to check along the gradient //direction. set to 10 if unsure. int check_splits, int target_cluster, int verbose ){ unsigned int N = W->N; //W->print(); double old_lambda=0; std::vector<int> rows,rowsj; std::vector<int>::iterator rowit,ri,rj; std::list< std::vector<int> > clusters,tocheck; std::list< std::vector<int> >::iterator it,cj; unsigned int i,k,j; int tried_restart; for(i=0;i<N;i++){ rows.assign(1,i); clusters.push_back(rows); } double *old_alpha = new double[N*Px]; double *alpha = new double[N*Px]; double *xbar = new double[N*Px]; double *dir = new double[N*Px]; for(i=0;i<N*Px;i++){ alpha[i]=xbar[i]=x[i]; } Matrix amat(alpha,N,Px),xmat(x,N,Px); SymNoDiag diffs(N); diffs.calc_diffs(clusters,amat,nrm2); //store initial trivial solution Results *results = new Results(N,Px,opt_thresh); if(target_cluster==0)results->add(alpha,0,0); double weight,diff,step; while(clusters.size()>1){ double grad=opt_thresh; int iteration=1; tried_restart=0; //if we use the general (slower) algorithm for any weights, then //split the clusters to individual points if(check_splits){ clusters.clear(); //reassign original clusters for(i=0;i<N;i++){ rows.assign(1,i); clusters.push_back(rows); } //recopy original xbar for(i=0;i<N*Px;i++){ xbar[i]=x[i]; } } while(grad>=opt_thresh){ //first calc gradients grad = 0; for(it=clusters.begin();it!=clusters.end();it++){ rows = *it; i = rows[0]; for(k=0;k<Px;k++){ dir[i+k*N] = xbar[i+k*N] - alpha[i+k*N]; } for(cj=clusters.begin();cj!=clusters.end();cj++){ if(it!=cj){ rowsj = *cj; j=rowsj[0]; weight=0; diff = *diffs(i,j); if(diff!=0){ if(smooth!=0){ diff *= diff; //now squared l2 norm diff += smooth; //add smoothing parameter under sqrt diff = sqrt(diff);//put sqrt back } for(ri=rows.begin();ri!=rows.end();ri++){ for(rj=rowsj.begin();rj!=rowsj.end();rj++){ weight += W->getval(*ri,*rj); } } //weight *= lambda / diff / ((double)(N-1)) / ((double)rows.size()); weight *= lambda / diff / ((double)rows.size()); for(k=0;k<Px;k++){ dir[i+k*N] += weight * (alpha[j+k*N]-alpha[i+k*N]); } } } } grad += nrm2(Array(dir+i,N,Px)); } //store this iteration //results->add(alpha,lambda,grad); //then take a step if(linesearch_freq==0 || (iteration % linesearch_freq)==0 ){ //Decreasing step size //TDH and pierre 18 jan 2011 try sqrt dec step size step=1/((double)iteration); //step=1/sqrt((double)iteration); if(verbose>=2)printf("grad %f step %f it %d\n",grad,step,iteration); take_step(clusters,alpha,dir,N,Px,step); }else{ double cost_here,cost_step; std::map<double,double> cost_steps; std::map<double,double>::iterator step1,step2; for(i=0;i<N*Px;i++)old_alpha[i]=alpha[i];//copy alpha //compare current cost to cost after stepping in gradient direction cost_here=cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda); step = 0; cost_steps.insert(std::pair<double,double>(cost_here,0)); while(cost_step<=cost_here){ take_step(clusters,alpha,dir,N,Px,1); step += 1; diffs.calc_diffs(clusters,amat,nrm2); cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda); if(verbose>=2) printf("cost %.10f step %f cost_here %f\n",cost_step,step,cost_here); cost_steps.insert(std::pair<double,double>(cost_step,step)); } for(int cuts=0;cuts<linesearch_points;cuts++){ step1=step2=cost_steps.begin(); step2++; step = (step1->second + step2->second)/2; for(i=0;i<N*Px;i++){ alpha[i]=old_alpha[i]; } take_step(clusters,alpha,dir,N,Px,step); diffs.calc_diffs(clusters,amat,nrm2); cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda); if(verbose>=2)printf("cost %.10f step %f %d\n",cost_step,step,cuts); cost_steps.insert(std::pair<double,double>(cost_step,step)); } cost_steps.clear(); } if(iteration++ > maxit){ if(tried_restart){ printf("max iteration %d exit\n",maxit); delete old_alpha; delete alpha; delete xbar; delete dir; return results; }else{ if(verbose>=1)printf("max iterations, trying restart from x\n"); tried_restart=1; iteration=1; for(i=0;i<N*Px;i++)alpha[i]=x[i]; } } //calculate differences diffs.calc_diffs(clusters,amat,nrm2); //check for joins JoinPair tojoin; while(dojoin(tojoin=check_clusters_thresh(&clusters,diffs,join_thresh))){ //if(verbose>=1) // printf("join: %d %d\n",tojoin.first->front(),tojoin.second->front()); int ni=tojoin.first->size(); int nj=tojoin.second->size(); i=tojoin.first->front(); j=tojoin.second->front(); tojoin.first->insert(tojoin.first->end(), tojoin.second->begin(), tojoin.second->end()); for(k=0;k<Px;k++){ alpha[i+k*N] = (alpha[i+k*N]*ni + alpha[j+k*N]*nj)/(ni+nj); xbar[i+k*N] = (xbar[i+k*N]*ni + xbar[j+k*N]*nj)/(ni+nj); } clusters.erase(tojoin.second); iteration=1; if(clusters.size()>1){ diffs.calc_diffs(clusters,amat,nrm2);//inefficient }else{ grad=0;//so we can escape from the last optimization loop } } }//while(grad>=opt_thresh) if(verbose>=1) printf("solution iteration %d lambda %f nclusters %d\n", iteration,lambda,(int)clusters.size()); if(target_cluster == 0){ //for each cluster, there may be several points. we store the //alpha value just in the row of the first point. thus here we //copy this value to the other rows before copying the optimal //alpha to results. for(it=clusters.begin();it!=clusters.end();it++){ rows = *it; if(rows.size()>1){ for(i=1;i<rows.size();i++){ for(k=0;k<Px;k++){ alpha[rows[i]+k*N] = alpha[rows[0]+k*N]; } } } } results->add(alpha,lambda,grad); } //haven't yet reached the target number of clusters, multiply //lambda by lambda_factor and continue along the path if((int)clusters.size()>target_cluster){ old_lambda=lambda; lambda *= lambda_factor; } //if we have passed the target cluster number then decrease //lambda and go look for it! if((int)clusters.size()<target_cluster){ if(verbose>=1){ printf("missed target %d, going back for it\n",target_cluster); } lambda = (lambda+old_lambda)/2; clusters.clear(); //reassign original clusters for(i=0;i<N;i++){ rows.assign(1,i); clusters.push_back(rows); } //recopy original xbar for(i=0;i<N*Px;i++){ xbar[i]=x[i]; } } //this is the number of clusters that we were looking for, //save and quit! if((int)clusters.size()==target_cluster){ for(it=clusters.begin();it!=clusters.end();it++){ rows = *it; if(rows.size()>1){ for(i=1;i<rows.size();i++){ for(k=0;k<Px;k++){ alpha[rows[i]+k*N] = alpha[rows[0]+k*N]; } } } } results->add(alpha,lambda,grad); if(verbose>=1)printf("got target cluster %d exit\n",target_cluster); delete old_alpha; delete alpha; delete xbar; delete dir; return results; } } //TODO: consolidate cleanup... just use data structures that //automatically clean themselves up when the function exits. delete old_alpha; delete alpha; delete xbar; delete dir; return results; }
static void minimize (gsl_multimin_function_fdf * fdf, const gsl_vector * x, const gsl_vector * p, double lambda, double stepa, double stepb, double stepc, double fa, double fb, double fc, double tol, gsl_vector * x1, gsl_vector * dx1, gsl_vector * x2, gsl_vector * dx2, gsl_vector * gradient, double * step, double * f, double * gnorm) { /* Starting at (x0, f0) move along the direction p to find a minimum f(x0 - lambda * p), returning the new point x1 = x0-lambda*p, f1=f(x1) and g1 = grad(f) at x1. */ double u = stepb; double v = stepa; double w = stepc; double fu = fb; double fv = fa; double fw = fc; double old2 = fabs(w - v); double old1 = fabs(v - u); double stepm, fm, pg, gnorm1; int iter = 0; gsl_vector_memcpy (x2, x1); gsl_vector_memcpy (dx2, dx1); *f = fb; *step = stepb; *gnorm = gsl_blas_dnrm2 (gradient); mid_trial: iter++; if (iter > 10) { return; /* MAX ITERATIONS */ } { double dw = w - u; double dv = v - u; double du = 0.0; double e1 = ((fv - fu) * dw * dw + (fu - fw) * dv * dv); double e2 = 2.0 * ((fv - fu) * dw + (fu - fw) * dv); if (e2 != 0.0) { du = e1 / e2; } if (du > 0.0 && du < (stepc - stepb) && fabs(du) < 0.5 * old2) { stepm = u + du; } else if (du < 0.0 && du > (stepa - stepb) && fabs(du) < 0.5 * old2) { stepm = u + du; } else if ((stepc - stepb) > (stepb - stepa)) { stepm = 0.38 * (stepc - stepb) + stepb; } else { stepm = stepb - 0.38 * (stepb - stepa); } } take_step (x, p, stepm, lambda, x1, dx1); fm = GSL_MULTIMIN_FN_EVAL_F (fdf, x1); #ifdef DEBUG printf ("trying stepm = %g fm = %.18e\n", stepm, fm); #endif if (fm > fb) { if (fm < fv) { w = v; v = stepm; fw = fv; fv = fm; } else if (fm < fw) { w = stepm; fw = fm; } if (stepm < stepb) { stepa = stepm; fa = fm; } else { stepc = stepm; fc = fm; } goto mid_trial; } else if (fm <= fb) { old2 = old1; old1 = fabs(u - stepm); w = v; v = u; u = stepm; fw = fv; fv = fu; fu = fm; gsl_vector_memcpy (x2, x1); gsl_vector_memcpy (dx2, dx1); GSL_MULTIMIN_FN_EVAL_DF (fdf, x1, gradient); gsl_blas_ddot (p, gradient, &pg); gnorm1 = gsl_blas_dnrm2 (gradient); #ifdef DEBUG printf ("p: "); gsl_vector_fprintf(stdout, p, "%g"); printf ("g: "); gsl_vector_fprintf(stdout, gradient, "%g"); printf ("gnorm: %.18e\n", gnorm1); printf ("pg: %.18e\n", pg); printf ("orth: %g\n", fabs (pg * lambda/ gnorm1)); #endif *f = fm; *step = stepm; *gnorm = gnorm1; if (fabs (pg * lambda / gnorm1) < tol) { #ifdef DEBUG printf("ok!\n"); #endif return; /* SUCCESS */ } if (stepm < stepb) { stepc = stepb; fc = fb; stepb = stepm; fb = fm; } else { stepa = stepb; fa = fb; stepb = stepm; fb = fm; } goto mid_trial; } }
static int conjugate_fr_iterate (void *vstate, gsl_multimin_function_fdf * fdf, gsl_vector * x, double *f, gsl_vector * gradient, gsl_vector * dx) { conjugate_fr_state_t *state = (conjugate_fr_state_t *) vstate; gsl_vector *x1 = state->x1; gsl_vector *dx1 = state->dx1; gsl_vector *x2 = state->x2; gsl_vector *p = state->p; gsl_vector *g0 = state->g0; double pnorm = state->pnorm; double g0norm = state->g0norm; double fa = *f, fb, fc; double dir; double stepa = 0.0, stepb, stepc = state->step, tol = state->tol; double g1norm; double pg; if (pnorm == 0.0 || g0norm == 0.0) { gsl_vector_set_zero (dx); return GSL_ENOPROG; } /* Determine which direction is downhill, +p or -p */ gsl_blas_ddot (p, gradient, &pg); dir = (pg >= 0.0) ? +1.0 : -1.0; /* Compute new trial point at x_c= x - step * p, where p is the current direction */ take_step (x, p, stepc, dir / pnorm, x1, dx); /* Evaluate function and gradient at new point xc */ fc = GSL_MULTIMIN_FN_EVAL_F (fdf, x1); if (fc < fa) { /* Success, reduced the function value */ state->step = stepc * 2.0; *f = fc; gsl_vector_memcpy (x, x1); GSL_MULTIMIN_FN_EVAL_DF (fdf, x1, gradient); return GSL_SUCCESS; } #ifdef DEBUG printf ("got stepc = %g fc = %g\n", stepc, fc); #endif /* Do a line minimisation in the region (xa,fa) (xc,fc) to find an intermediate (xb,fb) satisifying fa > fb < fc. Choose an initial xb based on parabolic interpolation */ intermediate_point (fdf, x, p, dir / pnorm, pg, stepa, stepc, fa, fc, x1, dx1, gradient, &stepb, &fb); if (stepb == 0.0) { return GSL_ENOPROG; } minimize (fdf, x, p, dir / pnorm, stepa, stepb, stepc, fa, fb, fc, tol, x1, dx1, x2, dx, gradient, &(state->step), f, &g1norm); gsl_vector_memcpy (x, x2); /* Choose a new conjugate direction for the next step */ state->iter = (state->iter + 1) % x->size; if (state->iter == 0) { gsl_vector_memcpy (p, gradient); state->pnorm = g1norm; } else { /* p' = g1 - beta * p */ double beta = -pow (g1norm / g0norm, 2.0); gsl_blas_dscal (-beta, p); gsl_blas_daxpy (1.0, gradient, p); state->pnorm = gsl_blas_dnrm2 (p); } state->g0norm = g1norm; gsl_vector_memcpy (g0, gradient); #ifdef DEBUG printf ("updated conjugate directions\n"); printf ("p: "); gsl_vector_fprintf (stdout, p, "%g"); printf ("g: "); gsl_vector_fprintf (stdout, gradient, "%g"); #endif return GSL_SUCCESS; }
static int vector_bfgs_iterate (void *vstate, gsl_multimin_function_fdf * fdf, gsl_vector * x, double *f, gsl_vector * gradient, gsl_vector * dx) { vector_bfgs_state_t *state = (vector_bfgs_state_t *) vstate; gsl_vector *x1 = state->x1; gsl_vector *dx1 = state->dx1; gsl_vector *x2 = state->x2; gsl_vector *p = state->p; gsl_vector *g0 = state->g0; gsl_vector *x0 = state->x0; double pnorm = state->pnorm; double g0norm = state->g0norm; double fa = *f, fb, fc; double dir; double stepa = 0.0, stepb, stepc = state->step, tol = state->tol; double g1norm; double pg; if (pnorm == 0.0 || g0norm == 0.0) { gsl_vector_set_zero (dx); return GSL_ENOPROG; } /* Determine which direction is downhill, +p or -p */ gsl_blas_ddot (p, gradient, &pg); dir = (pg >= 0.0) ? +1.0 : -1.0; /* Compute new trial point at x_c= x - step * p, where p is the current direction */ take_step (x, p, stepc, dir / pnorm, x1, dx); /* Evaluate function and gradient at new point xc */ fc = GSL_MULTIMIN_FN_EVAL_F (fdf, x1); if (fc < fa) { /* Success, reduced the function value */ state->step = stepc * 2.0; *f = fc; gsl_vector_memcpy (x, x1); GSL_MULTIMIN_FN_EVAL_DF (fdf, x1, gradient); return GSL_SUCCESS; } #ifdef DEBUG printf ("got stepc = %g fc = %g\n", stepc, fc); #endif /* Do a line minimisation in the region (xa,fa) (xc,fc) to find an intermediate (xb,fb) satisifying fa > fb < fc. Choose an initial xb based on parabolic interpolation */ intermediate_point (fdf, x, p, dir / pnorm, pg, stepa, stepc, fa, fc, x1, dx1, gradient, &stepb, &fb); if (stepb == 0.0) { return GSL_ENOPROG; } minimize (fdf, x, p, dir / pnorm, stepa, stepb, stepc, fa, fb, fc, tol, x1, dx1, x2, dx, gradient, &(state->step), f, &g1norm); gsl_vector_memcpy (x, x2); /* Choose a new direction for the next step */ state->iter = (state->iter + 1) % x->size; if (state->iter == 0) { gsl_vector_memcpy (p, gradient); state->pnorm = g1norm; } else { /* This is the BFGS update: */ /* p' = g1 - A dx - B dg */ /* A = - (1+ dg.dg/dx.dg) B + dg.g/dx.dg */ /* B = dx.g/dx.dg */ gsl_vector *dx0 = state->dx0; gsl_vector *dg0 = state->dg0; double dxg, dgg, dxdg, dgnorm, A, B; /* dx0 = x - x0 */ gsl_vector_memcpy (dx0, x); gsl_blas_daxpy (-1.0, x0, dx0); /* dg0 = g - g0 */ gsl_vector_memcpy (dg0, gradient); gsl_blas_daxpy (-1.0, g0, dg0); gsl_blas_ddot (dx0, gradient, &dxg); gsl_blas_ddot (dg0, gradient, &dgg); gsl_blas_ddot (dx0, dg0, &dxdg); dgnorm = gsl_blas_dnrm2 (dg0); if (dxdg != 0) { B = dxg / dxdg; A = -(1.0 + dgnorm * dgnorm / dxdg) * B + dgg / dxdg; } else { B = 0; A = 0; } gsl_vector_memcpy (p, gradient); gsl_blas_daxpy (-A, dx0, p); gsl_blas_daxpy (-B, dg0, p); state->pnorm = gsl_blas_dnrm2 (p); } gsl_vector_memcpy (g0, gradient); gsl_vector_memcpy (x0, x); state->g0norm = gsl_blas_dnrm2 (g0); #ifdef DEBUG printf ("updated directions\n"); printf ("p: "); gsl_vector_fprintf (stdout, p, "%g"); printf ("g: "); gsl_vector_fprintf (stdout, gradient, "%g"); #endif return GSL_SUCCESS; }