Esempio n. 1
0
std::vector<std::string> NaiveBayes::eval()
{
	std::vector<std::string> results;
	for (Data::iterator rowIt = testSet->begin(); rowIt != testSet->end() ; rowIt++)
	{
		DataRow &row = *rowIt;
		StrVector labels = this->model->getLabels();
		std::string maxLabel = labels[0];
		float maxProb = 0.0f;
		for (StrVector::iterator labelIt = labels.begin(); labelIt != labels.end(); labelIt++)
		{
			float prob = this->model->getProbability(&row, *labelIt);
			if (prob > maxProb)
			{
				maxProb = prob;
				maxLabel = *labelIt;
			}
		}
		results.push_back(maxLabel);
	}
	return results;
}
Esempio n. 2
0
void reportNGSAnalysis(const char *file_name, Params &params, NGSAlignment &aln, NGSTree &tree,
                       DoubleMatrix &rate_info, StrVector &rate_name) {
    ofstream out(file_name);
    out.setf(ios::fixed,ios::floatfield);

    int i, j, k;


    double *rate_param = new double[aln.num_states * aln.num_states];
    double *rate_matrix = new double[aln.num_states * aln.num_states];

    out << "Input file: " << params.ngs_file << endl;
    out << "Model of evolution: " << tree.getModel()->name << endl << endl;

    out << "Substitution process assuming one homogeneous model among all positions:" << endl;

    out << "Rate parameters: " << endl;

    tree.getModel()->getRateMatrix(rate_param);

    if (tree.getModel()->name == "UNREST") {
        for (i = 0, k=0; i < aln.num_states; i++)
            for (j = 0; j < aln.num_states; j++)
                if (i != j)
                    rate_matrix[i*aln.num_states+j] = rate_param[k++];
    } else {
        for (i = 0, k=0; i < aln.num_states-1; i++)
            for (j = i+1; j < aln.num_states; j++, k++)
                rate_matrix[i*aln.num_states+j] = rate_matrix[j*aln.num_states+i] = rate_param[k];
    }

    for (i = 0; i < aln.num_states; i++) {
        for (j = 0; j < aln.num_states; j++) {
            if (j > 0) out << " \t";
            if (j != i) out << rate_matrix[i*aln.num_states+j];
            else out << "-";
        }
        out << endl;
    }
    out << endl;
    out << "State frequencies: ";
    switch (tree.getModel()->getFreqType()) {
    case FREQ_EMPIRICAL:
        out << "(empirical counts from alignment)" << endl;
        break;
    case FREQ_ESTIMATE:
        out << "(estimated with maximum likelihood)" << endl;
        break;
    case FREQ_USER_DEFINED:
        out << "(user-defined)" << endl;
        break;
    case FREQ_EQUAL:
        out << "(equal frequencies)" << endl;
        break;
    default:
        break;
    }

    double *state_freq = new double[aln.num_states];
    tree.getModel()->getStateFrequency(state_freq);

    for (i = 0; i < aln.num_states; i++) out << state_freq[i] << " \t";
    out << endl << endl;

    out << "Q matrix can be obtained by multiplying rate parameters with state frequencies" << endl << endl;

    double *q_mat = new double[tree.aln->num_states * tree.aln->num_states];
    tree.getModel()->getQMatrix(q_mat);

    for (i = 0, k = 0; i < tree.aln->num_states; i++) {
        for (j = 0; j < tree.aln->num_states; j++, k++)
            out << "  " << q_mat[k];
        out << endl;
    }

    delete [] q_mat;

    out << endl;

    out << "Log-likelihood: " << tree.computeLikelihood() << endl << endl;

    out << "Inferred posisiton-specific rates under one model or position-specific model: " << endl;

    out << "Position\tSeq_error";
    for (StrVector::iterator it = rate_name.begin(); it != rate_name.end(); it++)
        out << "\t" << (*it);
    out << endl;
    for (i = 0; i < aln.ncategory; i++) {
        out << i+1 << '\t' << tree.getRate()->getRate(i);
        DoubleVector *rate_vec = &rate_info[i];
        for (DoubleVector::iterator dit = rate_vec->begin(); dit != rate_vec->end(); dit++)
            out << "\t" << *dit;
        out << endl;
    }
    out.close();
    cout << endl << "Results written to: " << file_name << endl << endl;
    delete [] state_freq;
    delete [] rate_matrix;
    delete [] rate_param;
}