Esempio n. 1
0
int kh_training_test3() {
	int* counts_array;

	cout << "\n\nKH-TRAINING-TEST-3" << endl;

	string s[] = { ".", ".", ".", ".", ".", "." };
	counts_array = new int[9];
	clean_int_array(counts_array, 9);
	for (int i = 0; i < 6; i++)
		process_structure(&(s[i]), counts_array);
	cout << endl << "13, 6, 19, 19, 7, 26, 7, 1, 8," << endl;

	string disp = "";
	double probs[3];
	double perc[3];
	get_production_probabilities(counts_array, probs);
	get_state_probabilities(counts_array, perc);
	set_production_probabilities_string(&disp, probs);
	set_state_probabilities_string(&disp, perc);
	cout << endl << disp;

	delete[] counts_array;

	return 0;
}
Esempio n. 2
0
/**
 * **Note: Overloaded to accept vectors<string>.
 *
 * Trains the Knudson-Hein grammar given a vector of string representing
 * structures. The calculated probabilities and percentages will fill the
 * corresponding provided arrays.
 *
 * @param set - Every structures in dot-bracket notation in the training set
 * @param cardinality - Size of the training set.
 * @param probs - Array to be set with the production probabilities. (Length = 3)
 * @param probs - Array to be set with the state production percentages. (Length = 3)
 */
void train_grammar(vector<string>* vec, double* probs, double* perc, bool verbose){
	DEBUG = false;
		int counts[9];
		clean_int_array(counts, 9);
		for (int i = 0; i < vec->size(); i++)
			process_structure(&((*vec)[i]), counts);
		get_production_probabilities(counts, probs);
		get_state_probabilities(counts, perc);
		if (verbose) {
			string s = "";
			set_production_probabilities_string(&s, probs);
			set_state_probabilities_string(&s, perc);
			cout << endl << s << endl;
		}
}
Esempio n. 3
0
/**
 * Trains the Knudson-Hein grammar given a string array of structures. The
 * calculated probabilities and percentages will fill the corresponding
 * provided arrays.
 *
 * @param set - Every structures in dot-bracket notation in the training set
 * @param cardinality - Size of the training set.
 * @param probs - Array to be set with the production probabilities. (Length = 3)
 * @param probs - Array to be set with the state production percentages. (Length = 3)
 */
void train_grammar(string* set, int cardinality, double* probs, double* perc,
		bool verbose) {
	DEBUG = false;
	int counts[9];
	clean_int_array(counts, 9);
	for (int i = 0; i < cardinality; i++)
		process_structure(&(set[i]), counts);
	get_production_probabilities(counts, probs);
	get_state_probabilities(counts, perc);
	if (verbose) {
		string s = "";
		set_production_probabilities_string(&s, probs);
		set_state_probabilities_string(&s, perc);
		cout << endl << s << endl;
	}
}
Esempio n. 4
0
void EnsembleGenerator::output(Ensemble& ensemble,
       const Vector<Vector<saxs::WeightedFitParameters> >& fps) const {

  if(ensemble.size() == 0) return;

  // calculate z-score
  Vector<double> scores(ensemble.size());
  for(unsigned int i=0; i<ensemble.size(); i++) scores[i] = ensemble[i].get_score();
  std::pair<double, double> average_and_std = get_average_and_stdev(scores);
  for(unsigned int i=0; i<ensemble.size(); i++) {
    double zscore = (ensemble[i].get_score()-average_and_std.first) /
      average_and_std.second;
    ensemble[i].set_zscore(zscore);
  }

  // calculate frequency of each state
  Vector<double> state_prob;
  get_state_probabilities(ensemble, state_prob);

  // calculate weights average and variance
  Vector<Vector<double> > weights_average(scorers_.size()),
    weights_variance(scorers_.size());
  for(unsigned int i=0; i<scorers_.size(); i++) {
    get_weights_average_and_std(ensemble, fps[i], weights_average[i],
                                weights_variance[i]);
  }

  // output file
  unsigned int number_of_states = ensemble[0].size();
  std::string out_file_name = "ensembles_size_" +
    std::string(boost::lexical_cast<std::string>(number_of_states)) + ".txt";
  std::ofstream s(out_file_name.c_str());
  std::cout << "multi_state_model_size " << ensemble.size ()
            << " number_of_states " << number_of_states << std::endl;

  for(unsigned int i=0; i<ensemble.size(); i++) {
    // output ensemble scores
    s.setf(std::ios::fixed, std::ios::floatfield);
    s << i+1 << " | " << std::setw(5) << std::setprecision(2)
      << ensemble[i].get_score(); // << " | " << ensemble[i].get_zscore();

    // output scores for each scorer
    for(unsigned int j=0; j<scorers_.size(); j++) {
      const saxs::WeightedFitParameters& p = fps[j][i];
      s << " | x" << std::string(boost::lexical_cast<std::string>(j+1))
        //scorers_[j]->get_dataset_name() << ": "
        << " " << std::setprecision(2) << p.get_chi()
        << " (" << p.get_c1() << ", " << p.get_c2() << ")";
    }
    s << std::endl;

    // output states and their probabilities
    const Vector<unsigned int>& states = ensemble[i].get_states();
    for(unsigned int k=0; k<states.size(); k++) {
      s << std::setw(5) << states[k];

      // output weights
      for(unsigned int j=0; j<scorers_.size(); j++) {
        const saxs::WeightedFitParameters& p = fps[j][i];
        if(p.get_weights().size() > k) {
          s << std::setw(5) << std::setprecision(3) << " | "
            << p.get_weights()[k] << " ("
            << weights_average[j][states[k]] << ", "
            << weights_variance[j][states[k]] << ")";
        }
      }
      s << " | "  << scorers_[0]->get_state_name(states[k])
        << " (" << state_prob[states[k]] << ")" << std::endl;
    }

    // output fit file
    if(i<10) { // TODO: add parameter
      for(unsigned int j=0; j<scorers_.size(); j++) {
        std::string fit_file_name = "multi_state_model_" +
          std::string(boost::lexical_cast<std::string>(number_of_states)) + "_" +
          std::string(boost::lexical_cast<std::string>(i+1));
        if(scorers_.size() > 0) {
          fit_file_name +=  "_" + std::string(boost::lexical_cast<std::string>(j+1));
        }
        fit_file_name += ".dat";
        scorers_[j]->write_fit_file(ensemble[i], fps[j][i], fit_file_name);
      }
    }
  }
  s.close();
}