std::vector<std::string> NaiveBayes::eval() { std::vector<std::string> results; for (Data::iterator rowIt = testSet->begin(); rowIt != testSet->end() ; rowIt++) { DataRow &row = *rowIt; StrVector labels = this->model->getLabels(); std::string maxLabel = labels[0]; float maxProb = 0.0f; for (StrVector::iterator labelIt = labels.begin(); labelIt != labels.end(); labelIt++) { float prob = this->model->getProbability(&row, *labelIt); if (prob > maxProb) { maxProb = prob; maxLabel = *labelIt; } } results.push_back(maxLabel); } return results; }
void reportNGSAnalysis(const char *file_name, Params ¶ms, NGSAlignment &aln, NGSTree &tree, DoubleMatrix &rate_info, StrVector &rate_name) { ofstream out(file_name); out.setf(ios::fixed,ios::floatfield); int i, j, k; double *rate_param = new double[aln.num_states * aln.num_states]; double *rate_matrix = new double[aln.num_states * aln.num_states]; out << "Input file: " << params.ngs_file << endl; out << "Model of evolution: " << tree.getModel()->name << endl << endl; out << "Substitution process assuming one homogeneous model among all positions:" << endl; out << "Rate parameters: " << endl; tree.getModel()->getRateMatrix(rate_param); if (tree.getModel()->name == "UNREST") { for (i = 0, k=0; i < aln.num_states; i++) for (j = 0; j < aln.num_states; j++) if (i != j) rate_matrix[i*aln.num_states+j] = rate_param[k++]; } else { for (i = 0, k=0; i < aln.num_states-1; i++) for (j = i+1; j < aln.num_states; j++, k++) rate_matrix[i*aln.num_states+j] = rate_matrix[j*aln.num_states+i] = rate_param[k]; } for (i = 0; i < aln.num_states; i++) { for (j = 0; j < aln.num_states; j++) { if (j > 0) out << " \t"; if (j != i) out << rate_matrix[i*aln.num_states+j]; else out << "-"; } out << endl; } out << endl; out << "State frequencies: "; switch (tree.getModel()->getFreqType()) { case FREQ_EMPIRICAL: out << "(empirical counts from alignment)" << endl; break; case FREQ_ESTIMATE: out << "(estimated with maximum likelihood)" << endl; break; case FREQ_USER_DEFINED: out << "(user-defined)" << endl; break; case FREQ_EQUAL: out << "(equal frequencies)" << endl; break; default: break; } double *state_freq = new double[aln.num_states]; tree.getModel()->getStateFrequency(state_freq); for (i = 0; i < aln.num_states; i++) out << state_freq[i] << " \t"; out << endl << endl; out << "Q matrix can be obtained by multiplying rate parameters with state frequencies" << endl << endl; double *q_mat = new double[tree.aln->num_states * tree.aln->num_states]; tree.getModel()->getQMatrix(q_mat); for (i = 0, k = 0; i < tree.aln->num_states; i++) { for (j = 0; j < tree.aln->num_states; j++, k++) out << " " << q_mat[k]; out << endl; } delete [] q_mat; out << endl; out << "Log-likelihood: " << tree.computeLikelihood() << endl << endl; out << "Inferred posisiton-specific rates under one model or position-specific model: " << endl; out << "Position\tSeq_error"; for (StrVector::iterator it = rate_name.begin(); it != rate_name.end(); it++) out << "\t" << (*it); out << endl; for (i = 0; i < aln.ncategory; i++) { out << i+1 << '\t' << tree.getRate()->getRate(i); DoubleVector *rate_vec = &rate_info[i]; for (DoubleVector::iterator dit = rate_vec->begin(); dit != rate_vec->end(); dit++) out << "\t" << *dit; out << endl; } out.close(); cout << endl << "Results written to: " << file_name << endl << endl; delete [] state_freq; delete [] rate_matrix; delete [] rate_param; }