int kh_training_test3() { int* counts_array; cout << "\n\nKH-TRAINING-TEST-3" << endl; string s[] = { ".", ".", ".", ".", ".", "." }; counts_array = new int[9]; clean_int_array(counts_array, 9); for (int i = 0; i < 6; i++) process_structure(&(s[i]), counts_array); cout << endl << "13, 6, 19, 19, 7, 26, 7, 1, 8," << endl; string disp = ""; double probs[3]; double perc[3]; get_production_probabilities(counts_array, probs); get_state_probabilities(counts_array, perc); set_production_probabilities_string(&disp, probs); set_state_probabilities_string(&disp, perc); cout << endl << disp; delete[] counts_array; return 0; }
/** * **Note: Overloaded to accept vectors<string>. * * Trains the Knudson-Hein grammar given a vector of string representing * structures. The calculated probabilities and percentages will fill the * corresponding provided arrays. * * @param set - Every structures in dot-bracket notation in the training set * @param cardinality - Size of the training set. * @param probs - Array to be set with the production probabilities. (Length = 3) * @param probs - Array to be set with the state production percentages. (Length = 3) */ void train_grammar(vector<string>* vec, double* probs, double* perc, bool verbose){ DEBUG = false; int counts[9]; clean_int_array(counts, 9); for (int i = 0; i < vec->size(); i++) process_structure(&((*vec)[i]), counts); get_production_probabilities(counts, probs); get_state_probabilities(counts, perc); if (verbose) { string s = ""; set_production_probabilities_string(&s, probs); set_state_probabilities_string(&s, perc); cout << endl << s << endl; } }
/** * Trains the Knudson-Hein grammar given a string array of structures. The * calculated probabilities and percentages will fill the corresponding * provided arrays. * * @param set - Every structures in dot-bracket notation in the training set * @param cardinality - Size of the training set. * @param probs - Array to be set with the production probabilities. (Length = 3) * @param probs - Array to be set with the state production percentages. (Length = 3) */ void train_grammar(string* set, int cardinality, double* probs, double* perc, bool verbose) { DEBUG = false; int counts[9]; clean_int_array(counts, 9); for (int i = 0; i < cardinality; i++) process_structure(&(set[i]), counts); get_production_probabilities(counts, probs); get_state_probabilities(counts, perc); if (verbose) { string s = ""; set_production_probabilities_string(&s, probs); set_state_probabilities_string(&s, perc); cout << endl << s << endl; } }
void EnsembleGenerator::output(Ensemble& ensemble, const Vector<Vector<saxs::WeightedFitParameters> >& fps) const { if(ensemble.size() == 0) return; // calculate z-score Vector<double> scores(ensemble.size()); for(unsigned int i=0; i<ensemble.size(); i++) scores[i] = ensemble[i].get_score(); std::pair<double, double> average_and_std = get_average_and_stdev(scores); for(unsigned int i=0; i<ensemble.size(); i++) { double zscore = (ensemble[i].get_score()-average_and_std.first) / average_and_std.second; ensemble[i].set_zscore(zscore); } // calculate frequency of each state Vector<double> state_prob; get_state_probabilities(ensemble, state_prob); // calculate weights average and variance Vector<Vector<double> > weights_average(scorers_.size()), weights_variance(scorers_.size()); for(unsigned int i=0; i<scorers_.size(); i++) { get_weights_average_and_std(ensemble, fps[i], weights_average[i], weights_variance[i]); } // output file unsigned int number_of_states = ensemble[0].size(); std::string out_file_name = "ensembles_size_" + std::string(boost::lexical_cast<std::string>(number_of_states)) + ".txt"; std::ofstream s(out_file_name.c_str()); std::cout << "multi_state_model_size " << ensemble.size () << " number_of_states " << number_of_states << std::endl; for(unsigned int i=0; i<ensemble.size(); i++) { // output ensemble scores s.setf(std::ios::fixed, std::ios::floatfield); s << i+1 << " | " << std::setw(5) << std::setprecision(2) << ensemble[i].get_score(); // << " | " << ensemble[i].get_zscore(); // output scores for each scorer for(unsigned int j=0; j<scorers_.size(); j++) { const saxs::WeightedFitParameters& p = fps[j][i]; s << " | x" << std::string(boost::lexical_cast<std::string>(j+1)) //scorers_[j]->get_dataset_name() << ": " << " " << std::setprecision(2) << p.get_chi() << " (" << p.get_c1() << ", " << p.get_c2() << ")"; } s << std::endl; // output states and their probabilities const Vector<unsigned int>& states = ensemble[i].get_states(); for(unsigned int k=0; k<states.size(); k++) { s << std::setw(5) << states[k]; // output weights for(unsigned int j=0; j<scorers_.size(); j++) { const saxs::WeightedFitParameters& p = fps[j][i]; if(p.get_weights().size() > k) { s << std::setw(5) << std::setprecision(3) << " | " << p.get_weights()[k] << " (" << weights_average[j][states[k]] << ", " << weights_variance[j][states[k]] << ")"; } } s << " | " << scorers_[0]->get_state_name(states[k]) << " (" << state_prob[states[k]] << ")" << std::endl; } // output fit file if(i<10) { // TODO: add parameter for(unsigned int j=0; j<scorers_.size(); j++) { std::string fit_file_name = "multi_state_model_" + std::string(boost::lexical_cast<std::string>(number_of_states)) + "_" + std::string(boost::lexical_cast<std::string>(i+1)); if(scorers_.size() > 0) { fit_file_name += "_" + std::string(boost::lexical_cast<std::string>(j+1)); } fit_file_name += ".dat"; scorers_[j]->write_fit_file(ensemble[i], fps[j][i], fit_file_name); } } } s.close(); }