Exemple #1
0
void EnsembleGenerator::get_state_probabilities(const Ensemble& ensemble,
                                         Vector<double>& state_prob) const {

  Vector<unsigned int> states_counters(N_, 0);
  unsigned int total_state_num = 0;

  // count the number of occurences of each state in MultiStateModels
  // in the entire Ensemble (states_counters)
  for(unsigned int i=0; i<ensemble.size(); i++) {
    const Vector<unsigned int>& states = ensemble[i].get_states();
    for(unsigned int k=0; k<states.size(); k++) {
      states_counters[states[k]]++;
      total_state_num++;
    }
  }

  // compute state probs and weight variance
  state_prob.insert(state_prob.begin(), N_, 0.0);
  // compute the probability of each state to appear in the MultiStateModels
  // (state_prob), it's average weight across models (weight_average)
  // and variance (weight_variance)
  for(unsigned int i=0; i<N_; i++) {
    if(states_counters[i] > 0) {
      if(states_counters[i] == 1) {
        state_prob[i] = 1.0/total_state_num;
      } else {
        state_prob[i] = states_counters[i]/(double)ensemble.size();
      }
    }
  }
}
Exemple #2
0
void EnsembleGenerator::add_one_state(const Ensemble& init_ensemble,
                                      Ensemble& new_ensemble) {

  std::priority_queue<boost::tuple<double, int, int>,
                      Vector<boost::tuple<double, int, int> >,
                      Comparator> bestK;

  // iterate over all init MultiStateModels and try to add a new state to each
  for(unsigned int i=0; i<init_ensemble.size(); i++) {
    unsigned int first_to_search = init_ensemble[i].get_last_state()+1;
    if(first_to_search<N_) {

      if(i>0 && i%100==0 && !bestK.empty()) {
        double curr_bestK_score = boost::get<0>(bestK.top());
        std::cout << "Extending ensemble: " << i << " out of "
                  << init_ensemble.size() << " last best "
                  << curr_bestK_score << std::endl;
      }

      MultiStateModel new_model(init_ensemble[i]);
      new_model.add_state(first_to_search);

      // try all possible additions of a new state
      for(unsigned int j=first_to_search; j<N_; j++) {
        new_model.replace_last_state(j);
        double curr_score = get_score(new_model);
        if(curr_score < 0.0) continue; // invalid model
        // add to bestK
        if(bestK.size() <= K_ || curr_score < boost::get<0>(bestK.top())) {
          bestK.push(boost::make_tuple(curr_score, i, j));
          if(bestK.size() > K_) bestK.pop();
        }
      }
    }
  }

  // save best scoring
  new_ensemble.assign(bestK.size(), MultiStateModel(0));
  int index = bestK.size()-1;
  // generate bestK new MultiStateModels
  while(!bestK.empty()) {
    double score;
    int ensemble_index, new_state_index;
    boost::tie(score, ensemble_index, new_state_index) = bestK.top();
    MultiStateModel new_model(init_ensemble[ensemble_index]);
    new_model.add_state(new_state_index);
    new_model.set_score(score);
    new_ensemble[index] = new_model;
    index--;
    bestK.pop();
  }
}
Exemple #3
0
float MeasureEnsembleMedian::measureCore(const Ensemble& iEnsemble) const {
   float median;
   // Remove missing values
   std::vector<float> temp;
   for(int i = 0; i < iEnsemble.size(); i++) {
      if(iEnsemble[i] != Global::MV) {
         temp.push_back(iEnsemble[i]);
      }
   }
   if(temp.size() == 0) {
      median = Global::MV;
   }
   else {
      std::sort(temp.begin(), temp.end());
      unsigned int N = temp.size();
      // Even size
      if(N % 2 == 0) {
         median = (temp[N/2 - 1] + temp[N/2])/2;
      }
      // Odd size
      else {
         median = temp[floor(N/2)];
      }
   }
   return median;
}
Exemple #4
0
bool Qc::qc(Ensemble& iEnsemble) const {
   bool anyChanges = false;
   for(int i = 0; i < iEnsemble.size(); i++) {
      if(!check(Value(iEnsemble[i], iEnsemble.getDate(), iEnsemble.getInit(), iEnsemble.getOffset(), iEnsemble.getLocation(), iEnsemble.getVariable()))) {
         anyChanges = true;
         iEnsemble[i] = Global::MV;
      }
   }
   return anyChanges;
}
Exemple #5
0
void CorrectorClim::correctCore(const Parameters& iParameters, Ensemble& iEnsemble) const {
   float climWeight = iParameters[0];
   float ensWeight = 1 - climWeight;
   float clim;
   if(mComputeClim) {
      assert(iParameters.size() == 2);
      clim = iParameters[1];
   }
   else
      clim = mData.getClim(iEnsemble.getDate(), iEnsemble.getInit(), iEnsemble.getOffset(),
            iEnsemble.getLocation(), iEnsemble.getVariable());
   if(Global::isValid(clim) && Global::isValid(ensWeight) && Global::isValid(climWeight)) {
      for(int n = 0; n < iEnsemble.size(); n++) {
         float currValue = iEnsemble[n];
         if(Global::isValid(currValue)) {
            iEnsemble[n] = currValue * ensWeight + clim * climWeight;
         }
      }
   }
}
Exemple #6
0
void CorrectorRound::correctCore(const Parameters& iParameters, Ensemble& iUnCorrected) const {
   float threshold;
   if(Global::isValid(mFixed))
      threshold = mFixed;
   else
      threshold = iParameters[0];

   for(int i = 0; i < iUnCorrected.size(); i++) {
      float fcst = iUnCorrected[i];
      // Round values down
      if(Global::isValid(mRoundDownTo)) {
         if(Global::isValid(fcst) && fcst < threshold) {
            iUnCorrected[i] = mRoundDownTo;
         }
      }
      // Round values up
      else {
         if(Global::isValid(fcst) && fcst > threshold) {
            iUnCorrected[i] = mRoundUpTo;
         }
      }
   }
}
Exemple #7
0
void EnsembleGenerator::get_weights_average_and_std(const Ensemble& ensemble,
                    const Vector<saxs::WeightedFitParameters>& fps,
                    Vector<double>& weights_average,
                    Vector<double>& weights_variance) const {

  Vector<unsigned int> states_counters(N_, 0);
  Vector<Vector<double> > states_weights(N_);

  // count the number of occurences of each state in MultiStateModels
  // (states_counters) and store the weights (states_weights)
  for(unsigned int i=0; i<ensemble.size(); i++) {
    const Vector<unsigned int>& states = ensemble[i].get_states();
    const Vector<double>& weights = fps[i].get_weights();
    for(unsigned int k=0; k<states.size(); k++) {
      states_counters[states[k]]++;
      states_weights[states[k]].push_back(weights[k]);
    }
  }

  // compute weights average and variance for each state
  weights_average.insert(weights_average.begin(), N_, 0.0);
  weights_variance.insert(weights_variance.begin(), N_, 0.0);

  for(unsigned int i=0; i < N_; i++) {
    if(states_counters[i] > 0) {
      if(states_counters[i] == 1) {
        weights_average[i] = states_weights[i][0];
        weights_variance[i] = 1.0;
      } else {
        std::pair<double, double> av_std = get_average_and_stdev(states_weights[i]);
        weights_average[i] = av_std.first;
        weights_variance[i] = av_std.second;
      }
    }
  }
}
Exemple #8
0
void Transform::derivative(Ensemble& iEnsemble) const {
   for(int i = 0; i < iEnsemble.size(); i++) {
      iEnsemble[i] = derivative(iEnsemble[i]);
   }
}
Exemple #9
0
void Transform::inverse(Ensemble& iEnsemble) const {
   for(int i = 0; i < iEnsemble.size(); i++) {
      iEnsemble[i] = inverse(iEnsemble[i]);
   }
}
Exemple #10
0
void Transform::transform(Ensemble& iEnsemble) const {
   for(int i = 0; i < iEnsemble.size(); i++) {
      iEnsemble[i] = transform(iEnsemble[i]);
   }
}
Exemple #11
0
void EnsembleGenerator::output(Ensemble& ensemble,
       const Vector<Vector<saxs::WeightedFitParameters> >& fps) const {

  if(ensemble.size() == 0) return;

  // calculate z-score
  Vector<double> scores(ensemble.size());
  for(unsigned int i=0; i<ensemble.size(); i++) scores[i] = ensemble[i].get_score();
  std::pair<double, double> average_and_std = get_average_and_stdev(scores);
  for(unsigned int i=0; i<ensemble.size(); i++) {
    double zscore = (ensemble[i].get_score()-average_and_std.first) /
      average_and_std.second;
    ensemble[i].set_zscore(zscore);
  }

  // calculate frequency of each state
  Vector<double> state_prob;
  get_state_probabilities(ensemble, state_prob);

  // calculate weights average and variance
  Vector<Vector<double> > weights_average(scorers_.size()),
    weights_variance(scorers_.size());
  for(unsigned int i=0; i<scorers_.size(); i++) {
    get_weights_average_and_std(ensemble, fps[i], weights_average[i],
                                weights_variance[i]);
  }

  // output file
  unsigned int number_of_states = ensemble[0].size();
  std::string out_file_name = "ensembles_size_" +
    std::string(boost::lexical_cast<std::string>(number_of_states)) + ".txt";
  std::ofstream s(out_file_name.c_str());
  std::cout << "multi_state_model_size " << ensemble.size ()
            << " number_of_states " << number_of_states << std::endl;

  for(unsigned int i=0; i<ensemble.size(); i++) {
    // output ensemble scores
    s.setf(std::ios::fixed, std::ios::floatfield);
    s << i+1 << " | " << std::setw(5) << std::setprecision(2)
      << ensemble[i].get_score(); // << " | " << ensemble[i].get_zscore();

    // output scores for each scorer
    for(unsigned int j=0; j<scorers_.size(); j++) {
      const saxs::WeightedFitParameters& p = fps[j][i];
      s << " | x" << std::string(boost::lexical_cast<std::string>(j+1))
        //scorers_[j]->get_dataset_name() << ": "
        << " " << std::setprecision(2) << p.get_chi()
        << " (" << p.get_c1() << ", " << p.get_c2() << ")";
    }
    s << std::endl;

    // output states and their probabilities
    const Vector<unsigned int>& states = ensemble[i].get_states();
    for(unsigned int k=0; k<states.size(); k++) {
      s << std::setw(5) << states[k];

      // output weights
      for(unsigned int j=0; j<scorers_.size(); j++) {
        const saxs::WeightedFitParameters& p = fps[j][i];
        if(p.get_weights().size() > k) {
          s << std::setw(5) << std::setprecision(3) << " | "
            << p.get_weights()[k] << " ("
            << weights_average[j][states[k]] << ", "
            << weights_variance[j][states[k]] << ")";
        }
      }
      s << " | "  << scorers_[0]->get_state_name(states[k])
        << " (" << state_prob[states[k]] << ")" << std::endl;
    }

    // output fit file
    if(i<10) { // TODO: add parameter
      for(unsigned int j=0; j<scorers_.size(); j++) {
        std::string fit_file_name = "multi_state_model_" +
          std::string(boost::lexical_cast<std::string>(number_of_states)) + "_" +
          std::string(boost::lexical_cast<std::string>(i+1));
        if(scorers_.size() > 0) {
          fit_file_name +=  "_" + std::string(boost::lexical_cast<std::string>(j+1));
        }
        fit_file_name += ".dat";
        scorers_[j]->write_fit_file(ensemble[i], fps[j][i], fit_file_name);
      }
    }
  }
  s.close();
}
Exemple #12
0
void EnsembleGenerator::rescore(Ensemble& ensemble,
                                Ensemble& rescored_ensemble,
                     Vector<Vector<saxs::WeightedFitParameters> >& rescored_fps)
  const {

  unsigned int print_num = std::min((unsigned int)ensemble.size(), K_);

  Vector<Vector<saxs::WeightedFitParameters> > fps(scorers_.size()),
    sorted_fps(scorers_.size());
  std::multimap<double, unsigned int> scores;
  unsigned int counter = 0;

  // re-score
  for(unsigned int i = 0; i < ensemble.size(); i++) {
    if(i>0 && i%100==0) {
      std::cerr << "Rescoring ensemble " << i << " out of " << ensemble.size() << std::endl;
    }
    // iterate scorers and record max weight for each state
    Vector<double> max_weights(ensemble[i].size(), 0.0);
    double score = 0;
    for(unsigned int k = 0; k < scorers_.size(); k++) {
      saxs::WeightedFitParameters p = scorers_[k]->get_fit_parameters(ensemble[i]);
      score += p.get_score();
      // find the max weight contribution of each state
      for(unsigned int wi = 0; wi < p.get_weights().size(); wi++) {
        if(p.get_weights()[wi] > max_weights[wi])
          max_weights[wi] = p.get_weights()[wi];
      }
      fps[k].push_back(p);
    }
    ensemble[i].set_score(score);

    // check max weights for threshold
    for(unsigned int wi=0; wi<max_weights.size(); wi++) {
      if(max_weights[wi] < min_weight_threshold_) {
        ensemble[i].set_score(-1);
        break;
      }
    }

    // do not output MultiStateModels with one of the weights below threshold
    if(ensemble[i].get_score() < 0.0) continue;
    scores.insert(std::make_pair(ensemble[i].get_score(), i));
    counter++;

    if(counter >= K_) break;
  }

  // sort
  Ensemble sorted_ensemble;
  sorted_ensemble.reserve(print_num);
  std::multimap<double, unsigned int>::iterator it, end_it = scores.end();
  for(it = scores.begin(); it != end_it; it++) {
    //std::cerr << "score = " << it->first << std::endl;
    sorted_ensemble.push_back(ensemble[it->second]);
    for(unsigned int k=0; k<scorers_.size(); k++) {
      sorted_fps[k].push_back(fps[k][it->second]);
    }
  }
  rescored_ensemble = sorted_ensemble;
  rescored_fps = sorted_fps;
}