void RateFree::setVariables(double *variables) { if (getNDim() == 0) return; int i; // Modified by Thomas on 13 May 2015 // --start-- /* variables[1] = prop[0]; for (i = 2; i < ncategory; i++) variables[i] = variables[i-1] + prop[i-1]; */ if (optimizing_params == 2) { // proportions for (i = 0; i < ncategory-1; i++) variables[i+1] = prop[i] / prop[ncategory-1]; } else if (optimizing_params == 1) { // rates for (i = 0; i < ncategory-1; i++) variables[i+1] = rates[i]; } else { // both rates and weights for (i = 0; i < ncategory-1; i++) variables[i+1] = prop[i] / prop[ncategory-1]; for (i = 0; i < ncategory-1; i++) variables[i+ncategory] = rates[i] / rates[ncategory-1]; } }
bool RateGamma::getVariables(double *variables) { if (getNDim() == 0) return false; bool changed = (gamma_shape != variables[1]); gamma_shape = variables[1]; if (changed) computeRates(); return changed; }
int getStrides( void* arr, int* strides ) { npy_intp* np_dims = PyArray_STRIDES( arr ); int i; for( i=0; i<getNDim( arr ); i++ ) { strides[i] = (int)np_dims[i]; } return 0; }
int getSize( void* arr ) { npy_intp* np_dims = PyArray_DIMS( arr ); int i, size = 1, nd = getNDim( arr ); for( i=0; i<nd; i++ ) { size = size * (int)np_dims[i]; } return size; }
int getDims( void* arr, int* dims ) { npy_intp* np_dims = PyArray_DIMS( arr ); int i; for( i=0; i<getNDim( arr ); i++ ) { dims[i] = (int)np_dims[i]; } return 0; }
double RateGammaInvar::optimizeParameters(double gradient_epsilon) { int ndim = getNDim(); // return if nothing to be optimized if (ndim == 0) return phylo_tree->computeLikelihood(); if (verbose_mode >= VB_MED) cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl; if (optimize_alg.find("EM_RR") != string::npos) { return randomRestartOptimization(gradient_epsilon); } else if (optimize_alg.find("Brent") != string::npos || phylo_tree->aln->frac_const_sites == 0.0 || isFixPInvar() || isFixGammaShape()) { double lh = phylo_tree->computeLikelihood(); cur_optimize = 0; double gamma_lh = RateGamma::optimizeParameters(gradient_epsilon); ASSERT(gamma_lh >= lh-0.1); cur_optimize = 1; double invar_lh = -DBL_MAX; invar_lh = RateInvar::optimizeParameters(gradient_epsilon); ASSERT(invar_lh >= gamma_lh-0.1); cur_optimize = 0; return invar_lh; } else if (optimize_alg.find("EM") != string::npos) { return optimizeWithEM(gradient_epsilon); } else if (optimize_alg.find("BFGS") != string::npos) { //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false); double *variables = new double[ndim+1]; double *upper_bound = new double[ndim+1]; double *lower_bound = new double[ndim+1]; bool *bound_check = new bool[ndim+1]; double score; // by BFGS algorithm setVariables(variables); setBounds(lower_bound, upper_bound, bound_check); score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_GAMMA_SHAPE)); getVariables(variables); phylo_tree->clearAllPartialLH(); score = phylo_tree->computeLikelihood(); delete [] bound_check; delete [] lower_bound; delete [] upper_bound; delete [] variables; return score; } else { string errMsg = "Unknown optimization algorithm: " + optimize_alg; outError(errMsg.c_str()); return 0.0; } }
double NGSRateCat::optimizeParameters(double epsilon) { int ndim = getNDim(); // return if nothing to be optimized if (ndim == 0) return 0.0; cout << "Optimizing " << name << " model parameters..." << endl; double *variables = new double[ndim+1]; double *upper_bound = new double[ndim+1]; double *lower_bound = new double[ndim+1]; bool *bound_check = new bool[ndim+1]; int i; double score; // by BFGS algorithm setVariables(variables); for (i = 1; i <= ndim; i++) { //cout << variables[i] << endl; lower_bound[i] = 1e-4; upper_bound[i] = 100.0; bound_check[i] = false; } for (i = ndim-ncategory+2; i <= ndim; i++) upper_bound[i] = 1.0; //packData(variables, lower_bound, upper_bound, bound_check); score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(epsilon, 1e-6)); getVariables(variables); delete [] bound_check; delete [] lower_bound; delete [] upper_bound; delete [] variables; return score; }
void RateFree::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) { if (getNDim() == 0) return; int i; if (optimizing_params == 2) { // proportions for (i = 1; i < ncategory; i++) { lower_bound[i] = MIN_FREE_RATE_PROP; upper_bound[i] = MAX_FREE_RATE_PROP; bound_check[i] = false; } } else if (optimizing_params == 1){ // rates for (i = 1; i < ncategory; i++) { lower_bound[i] = MIN_FREE_RATE; upper_bound[i] = MAX_FREE_RATE; bound_check[i] = false; } } else { // both weights and rates for (i = 1; i < ncategory; i++) { lower_bound[i] = MIN_FREE_RATE_PROP; upper_bound[i] = MAX_FREE_RATE_PROP; bound_check[i] = false; } for (i = 1; i < ncategory; i++) { lower_bound[i+ncategory-1] = MIN_FREE_RATE; upper_bound[i+ncategory-1] = MAX_FREE_RATE; bound_check[i+ncategory-1] = false; } } // for (i = ncategory; i <= 2*ncategory-2; i++) { // lower_bound[i] = MIN_FREE_RATE; // upper_bound[i] = MAX_FREE_RATE; // bound_check[i] = false; // } }
void RateInvar::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) { if (getNDim() == 0) return; lower_bound[1] = MIN_PINVAR; upper_bound[1] = phylo_tree->aln->frac_const_sites; bound_check[1] = false; }
void RateGamma::setVariables(double *variables) { if (getNDim() == 0) return; variables[1] = gamma_shape; }
void RateGamma::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) { if (getNDim() == 0) return; lower_bound[1] = phylo_tree->params->min_gamma_shape; upper_bound[1] = MAX_GAMMA_SHAPE; bound_check[1] = false; }
bool RateFree::getVariables(double *variables) { if (getNDim() == 0) return false; int i; bool changed = false; // Modified by Thomas on 13 May 2015 // --start-- /* double *y = new double[2*ncategory+1]; double *z = y+ncategory+1; // site proportions: y[0..c] <-> (0.0, variables[1..c-1], 1.0) y[0] = 0; y[ncategory] = 1.0; memcpy(y+1, variables+1, (ncategory-1) * sizeof(double)); std::sort(y+1, y+ncategory); // category rates: z[0..c-1] <-> (variables[c..2*c-2], 1.0) memcpy(z, variables+ncategory, (ncategory-1) * sizeof(double)); z[ncategory-1] = 1.0; //std::sort(z, z+ncategory-1); double sum = 0.0; for (i = 0; i < ncategory; i++) { prop[i] = (y[i+1]-y[i]); sum += prop[i] * z[i]; } for (i = 0; i < ncategory; i++) { rates[i] = z[i] / sum; } delete [] y; */ double sum = 1.0; if (optimizing_params == 2) { // proportions for (i = 0; i < ncategory-1; i++) { sum += variables[i+1]; } for (i = 0; i < ncategory-1; i++) { changed |= (prop[i] != variables[i+1] / sum); prop[i] = variables[i+1] / sum; } changed |= (prop[ncategory-1] != 1.0 / sum); prop[ncategory-1] = 1.0 / sum; // added by Thomas on Sept 10, 15 // update the values of rates, in order to // maintain the sum of prop[i]*rates[i] = 1 // sum = 0; // for (i = 0; i < ncategory; i++) { // sum += prop[i] * rates[i]; // } // for (i = 0; i < ncategory; i++) { // rates[i] = rates[i] / sum; // } } else if (optimizing_params == 1) { // rates for (i = 0; i < ncategory-1; i++) { changed |= (rates[i] != variables[i+1]); rates[i] = variables[i+1]; } // added by Thomas on Sept 10, 15 // need to normalize the values of rates, in order to // maintain the sum of prop[i]*rates[i] = 1 // sum = 0; // for (i = 0; i < ncategory; i++) { // sum += prop[i] * rates[i]; // } // for (i = 0; i < ncategory; i++) { // rates[i] = rates[i] / sum; // } } else { // both weights and rates for (i = 0; i < ncategory-1; i++) { sum += variables[i+1]; } for (i = 0; i < ncategory-1; i++) { changed |= (prop[i] != variables[i+1] / sum); prop[i] = variables[i+1] / sum; } changed |= (prop[ncategory-1] != 1.0 / sum); prop[ncategory-1] = 1.0 / sum; // then rates sum = prop[ncategory-1]; for (i = 0; i < ncategory-1; i++) { sum += prop[i] * variables[i+ncategory]; } for (i = 0; i < ncategory-1; i++) { changed |= (rates[i] != variables[i+ncategory] / sum); rates[i] = variables[i+ncategory] / sum; } changed |= (rates[ncategory-1] != 1.0 / sum); rates[ncategory-1] = 1.0 / sum; } // --end-- return changed; }
/** optimize parameters. Default is to optimize gamma shape @return the best likelihood */ double RateFree::optimizeParameters(double gradient_epsilon) { int ndim = getNDim(); // return if nothing to be optimized if (ndim == 0) return phylo_tree->computeLikelihood(); if (verbose_mode >= VB_MED) cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl; // TODO: turn off EM algorithm for +ASC model if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty())) if (fix_params == 0) return optimizeWithEM(); //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false); double *variables = new double[ndim+1]; double *upper_bound = new double[ndim+1]; double *lower_bound = new double[ndim+1]; bool *bound_check = new bool[ndim+1]; double score; // score = optimizeWeights(); int left = 1, right = 2; if (fix_params == 1) // fix proportions right = 1; if (optimize_alg.find("1-BFGS") != string::npos) { left = 0; right = 0; } // changed to Wi -> Ri by Thomas on Sept 11, 15 for (optimizing_params = right; optimizing_params >= left; optimizing_params--) { ndim = getNDim(); // by BFGS algorithm setVariables(variables); setBounds(lower_bound, upper_bound, bound_check); // if (optimizing_params == 2 && optimize_alg.find("-EM") != string::npos) // score = optimizeWeights(); // else if (optimize_alg.find("BFGS-B") != string::npos) score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_FREE_RATE)); else score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_FREE_RATE)); getVariables(variables); // sort the rates in increasing order if (sorted_rates) quicksort(rates, 0, ncategory-1, prop); phylo_tree->clearAllPartialLH(); score = phylo_tree->computeLikelihood(); } optimizing_params = 0; delete [] bound_check; delete [] lower_bound; delete [] upper_bound; delete [] variables; return score; }