Exemplo n.º 1
0
void RateFree::setVariables(double *variables) {
	if (getNDim() == 0) return;
	int i;

	// Modified by Thomas on 13 May 2015
	// --start--
	/*
	variables[1] = prop[0];
	for (i = 2; i < ncategory; i++)
		variables[i] = variables[i-1] + prop[i-1];
	*/
    
    if (optimizing_params == 2) {    
        // proportions
        for (i = 0; i < ncategory-1; i++)
            variables[i+1] = prop[i] / prop[ncategory-1];
    } else if (optimizing_params == 1) {
        // rates
        for (i = 0; i < ncategory-1; i++)
            variables[i+1] = rates[i];
    } else {
        // both rates and weights
        for (i = 0; i < ncategory-1; i++)
            variables[i+1] = prop[i] / prop[ncategory-1];
        for (i = 0; i < ncategory-1; i++)
            variables[i+ncategory] = rates[i] / rates[ncategory-1];
    }

}
Exemplo n.º 2
0
bool RateGamma::getVariables(double *variables) {
	if (getNDim() == 0) return false;
    bool changed = (gamma_shape != variables[1]);
	gamma_shape = variables[1];
    if (changed)
        computeRates();
    return changed;
}
Exemplo n.º 3
0
int getStrides( void* arr, int* strides ) {
	npy_intp* np_dims = PyArray_STRIDES( arr );
	int i;
	for( i=0; i<getNDim( arr ); i++ ) {
		strides[i] = (int)np_dims[i];
	}
	return 0;
}
Exemplo n.º 4
0
int getSize( void* arr ) {
	npy_intp* np_dims = PyArray_DIMS( arr );
	int i, size = 1, nd = getNDim( arr );
	for( i=0; i<nd; i++ ) {
		size = size * (int)np_dims[i];
	}
	return size;
}
Exemplo n.º 5
0
int getDims( void* arr, int* dims ) {
	npy_intp* np_dims = PyArray_DIMS( arr );
	int i;
	for( i=0; i<getNDim( arr ); i++ ) {
		dims[i] = (int)np_dims[i];
	}
	return 0;
}
Exemplo n.º 6
0
double RateGammaInvar::optimizeParameters(double gradient_epsilon) {

	int ndim = getNDim();

	// return if nothing to be optimized
	if (ndim == 0)
		return phylo_tree->computeLikelihood();

    if (verbose_mode >= VB_MED)
        cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;

	if (optimize_alg.find("EM_RR") != string::npos) {
        return randomRestartOptimization(gradient_epsilon);
    } else if (optimize_alg.find("Brent") != string::npos || phylo_tree->aln->frac_const_sites == 0.0 || isFixPInvar() || isFixGammaShape()) {
		double lh = phylo_tree->computeLikelihood();
		cur_optimize = 0;
		double gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
		ASSERT(gamma_lh >= lh-0.1);
		cur_optimize = 1;
		double invar_lh = -DBL_MAX;
        invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
		ASSERT(invar_lh >= gamma_lh-0.1);
        cur_optimize = 0;
        return invar_lh;
	} else if (optimize_alg.find("EM") != string::npos) {
        return optimizeWithEM(gradient_epsilon);
    } else if (optimize_alg.find("BFGS") != string::npos) {
        //if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);
        double *variables = new double[ndim+1];
        double *upper_bound = new double[ndim+1];
        double *lower_bound = new double[ndim+1];
        bool *bound_check = new bool[ndim+1];
        double score;

        // by BFGS algorithm
        setVariables(variables);
        setBounds(lower_bound, upper_bound, bound_check);

        score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_GAMMA_SHAPE));

        getVariables(variables);

        phylo_tree->clearAllPartialLH();
        score = phylo_tree->computeLikelihood();

        delete [] bound_check;
        delete [] lower_bound;
        delete [] upper_bound;
        delete [] variables;

        return score;
    } else {
        string errMsg = "Unknown optimization algorithm: " + optimize_alg;
        outError(errMsg.c_str());
        return 0.0;
    }
}
Exemplo n.º 7
0
double NGSRateCat::optimizeParameters(double epsilon) {
    int ndim = getNDim();

    // return if nothing to be optimized
    if (ndim == 0) return 0.0;

    cout << "Optimizing " << name << " model parameters..." << endl;


    double *variables = new double[ndim+1];
    double *upper_bound = new double[ndim+1];
    double *lower_bound = new double[ndim+1];
    bool *bound_check = new bool[ndim+1];
    int i;
    double score;

    // by BFGS algorithm
    setVariables(variables);
    for (i = 1; i <= ndim; i++) {
        //cout << variables[i] << endl;
        lower_bound[i] = 1e-4;
        upper_bound[i] = 100.0;
        bound_check[i] = false;
    }
    for (i = ndim-ncategory+2; i <= ndim; i++)
        upper_bound[i] = 1.0;
    //packData(variables, lower_bound, upper_bound, bound_check);
    score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(epsilon, 1e-6));

    getVariables(variables);

    delete [] bound_check;
    delete [] lower_bound;
    delete [] upper_bound;
    delete [] variables;

    return score;
}
Exemplo n.º 8
0
void RateFree::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {
	if (getNDim() == 0) return;
	int i;
    if (optimizing_params == 2) {
        // proportions
        for (i = 1; i < ncategory; i++) {
            lower_bound[i] = MIN_FREE_RATE_PROP;
            upper_bound[i] = MAX_FREE_RATE_PROP;
            bound_check[i] = false;
        }
    } else if (optimizing_params == 1){
        // rates
        for (i = 1; i < ncategory; i++) {
            lower_bound[i] = MIN_FREE_RATE;
            upper_bound[i] = MAX_FREE_RATE;
            bound_check[i] = false;
        }
    } else {
        // both weights and rates
        for (i = 1; i < ncategory; i++) {
            lower_bound[i] = MIN_FREE_RATE_PROP;
            upper_bound[i] = MAX_FREE_RATE_PROP;
            bound_check[i] = false;
        }
        for (i = 1; i < ncategory; i++) {
            lower_bound[i+ncategory-1] = MIN_FREE_RATE;
            upper_bound[i+ncategory-1] = MAX_FREE_RATE;
            bound_check[i+ncategory-1] = false;
        }
        
    }
//	for (i = ncategory; i <= 2*ncategory-2; i++) {
//		lower_bound[i] = MIN_FREE_RATE;
//		upper_bound[i] = MAX_FREE_RATE;
//		bound_check[i] = false;
//	}
}
Exemplo n.º 9
0
void RateInvar::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {
	if (getNDim() == 0) return;
	lower_bound[1] = MIN_PINVAR;
	upper_bound[1] = phylo_tree->aln->frac_const_sites;
	bound_check[1] = false;
}
Exemplo n.º 10
0
void RateGamma::setVariables(double *variables) {
	if (getNDim() == 0) return;
	variables[1] = gamma_shape;
}
Exemplo n.º 11
0
void RateGamma::setBounds(double *lower_bound, double *upper_bound, bool *bound_check) {
	if (getNDim() == 0) return;
	lower_bound[1] = phylo_tree->params->min_gamma_shape;
	upper_bound[1] = MAX_GAMMA_SHAPE;
	bound_check[1] = false;
}
Exemplo n.º 12
0
bool RateFree::getVariables(double *variables) {
	if (getNDim() == 0) return false;
	int i;
    bool changed = false;
	// Modified by Thomas on 13 May 2015
	// --start--
	/*
	double *y = new double[2*ncategory+1];
	double *z = y+ncategory+1;
	//  site proportions: y[0..c] <-> (0.0, variables[1..c-1], 1.0)
	y[0] = 0; y[ncategory] = 1.0;
	memcpy(y+1, variables+1, (ncategory-1) * sizeof(double));
	std::sort(y+1, y+ncategory);

	// category rates: z[0..c-1] <-> (variables[c..2*c-2], 1.0)
	memcpy(z, variables+ncategory, (ncategory-1) * sizeof(double));
	z[ncategory-1] = 1.0;
	//std::sort(z, z+ncategory-1);

	double sum = 0.0;
	for (i = 0; i < ncategory; i++) {
		prop[i] = (y[i+1]-y[i]);
		sum += prop[i] * z[i];
	}
	for (i = 0; i < ncategory; i++) {
		rates[i] = z[i] / sum;
	}

	delete [] y;
	*/

	double sum = 1.0;
    if (optimizing_params == 2) {
        // proportions
        for (i = 0; i < ncategory-1; i++) {
            sum += variables[i+1];
        }
        for (i = 0; i < ncategory-1; i++) {
            changed |= (prop[i] != variables[i+1] / sum);
            prop[i] = variables[i+1] / sum;
        }
        changed |= (prop[ncategory-1] != 1.0 / sum);
        prop[ncategory-1] = 1.0 / sum;
        // added by Thomas on Sept 10, 15
        // update the values of rates, in order to
        // maintain the sum of prop[i]*rates[i] = 1
//        sum = 0;
//        for (i = 0; i < ncategory; i++) {
//            sum += prop[i] * rates[i];
//        }
//        for (i = 0; i < ncategory; i++) {
//            rates[i] = rates[i] / sum;
//        }
    } else if (optimizing_params == 1) {
        // rates
        for (i = 0; i < ncategory-1; i++) {
            changed |= (rates[i] != variables[i+1]);
            rates[i] = variables[i+1];
        }
        // added by Thomas on Sept 10, 15
        // need to normalize the values of rates, in order to
        // maintain the sum of prop[i]*rates[i] = 1
//        sum = 0;
//        for (i = 0; i < ncategory; i++) {
//            sum += prop[i] * rates[i];
//        }
//        for (i = 0; i < ncategory; i++) {
//            rates[i] = rates[i] / sum;
//        }
    } else {
        // both weights and rates
        for (i = 0; i < ncategory-1; i++) {
            sum += variables[i+1];
        }
        for (i = 0; i < ncategory-1; i++) {
            changed |= (prop[i] != variables[i+1] / sum);
            prop[i] = variables[i+1] / sum;
        }
        changed |= (prop[ncategory-1] != 1.0 / sum);
        prop[ncategory-1] = 1.0 / sum;
        
        // then rates
    	sum = prop[ncategory-1];
    	for (i = 0; i < ncategory-1; i++) {
    		sum += prop[i] * variables[i+ncategory];
    	}
    	for (i = 0; i < ncategory-1; i++) {
            changed |= (rates[i] != variables[i+ncategory] / sum);
    		rates[i] = variables[i+ncategory] / sum;
    	}
        changed |= (rates[ncategory-1] != 1.0 / sum);
    	rates[ncategory-1] = 1.0 / sum;
    }
	// --end--
    return changed;
}
Exemplo n.º 13
0
/**
	optimize parameters. Default is to optimize gamma shape
	@return the best likelihood
*/
double RateFree::optimizeParameters(double gradient_epsilon) {

	int ndim = getNDim();

	// return if nothing to be optimized
	if (ndim == 0)
		return phylo_tree->computeLikelihood();

	if (verbose_mode >= VB_MED)
		cout << "Optimizing " << name << " model parameters by " << optimize_alg << " algorithm..." << endl;

    // TODO: turn off EM algorithm for +ASC model
    if ((optimize_alg.find("EM") != string::npos && phylo_tree->getModelFactory()->unobserved_ptns.empty()))
        if (fix_params == 0)
            return optimizeWithEM();

	//if (freq_type == FREQ_ESTIMATE) scaleStateFreq(false);

	double *variables = new double[ndim+1];
	double *upper_bound = new double[ndim+1];
	double *lower_bound = new double[ndim+1];
	bool *bound_check = new bool[ndim+1];
	double score;

//    score = optimizeWeights();

    int left = 1, right = 2;
    if (fix_params == 1) // fix proportions
        right = 1;
    if (optimize_alg.find("1-BFGS") != string::npos) {
        left = 0; 
        right = 0;
    }

    // changed to Wi -> Ri by Thomas on Sept 11, 15
    for (optimizing_params = right; optimizing_params >= left; optimizing_params--) {
    
        ndim = getNDim();
        // by BFGS algorithm
        setVariables(variables);
        setBounds(lower_bound, upper_bound, bound_check);

//        if (optimizing_params == 2 && optimize_alg.find("-EM") != string::npos)
//            score = optimizeWeights();
//        else 
        if (optimize_alg.find("BFGS-B") != string::npos)
            score = -L_BFGS_B(ndim, variables+1, lower_bound+1, upper_bound+1, max(gradient_epsilon, TOL_FREE_RATE));
        else
            score = -minimizeMultiDimen(variables, ndim, lower_bound, upper_bound, bound_check, max(gradient_epsilon, TOL_FREE_RATE));

        getVariables(variables);
        // sort the rates in increasing order
        if (sorted_rates)
            quicksort(rates, 0, ncategory-1, prop);
        phylo_tree->clearAllPartialLH();
        score = phylo_tree->computeLikelihood();
    }
    optimizing_params = 0;

	delete [] bound_check;
	delete [] lower_bound;
	delete [] upper_bound;
	delete [] variables;

	return score;
}