Example #1
0
void EnsembleGenerator::get_state_probabilities(const Ensemble& ensemble,
                                         Vector<double>& state_prob) const {

  Vector<unsigned int> states_counters(N_, 0);
  unsigned int total_state_num = 0;

  // count the number of occurences of each state in MultiStateModels
  // in the entire Ensemble (states_counters)
  for(unsigned int i=0; i<ensemble.size(); i++) {
    const Vector<unsigned int>& states = ensemble[i].get_states();
    for(unsigned int k=0; k<states.size(); k++) {
      states_counters[states[k]]++;
      total_state_num++;
    }
  }

  // compute state probs and weight variance
  state_prob.insert(state_prob.begin(), N_, 0.0);
  // compute the probability of each state to appear in the MultiStateModels
  // (state_prob), it's average weight across models (weight_average)
  // and variance (weight_variance)
  for(unsigned int i=0; i<N_; i++) {
    if(states_counters[i] > 0) {
      if(states_counters[i] == 1) {
        state_prob[i] = 1.0/total_state_num;
      } else {
        state_prob[i] = states_counters[i]/(double)ensemble.size();
      }
    }
  }
}
Example #2
0
int
SyncFiles::run(void)
{
  returnValue=0;

  // we append the qa_target, if available
  if( qa_target.size()  )
    ensemble->addTarget(qa_target);

  // read dates from nc-files,
  // sort dates in ascending order (old --> young)
  // and get modification times of files.
  // Any annotation would be done there.
  std::string str;

  returnValue = ensemble->getTimes(str) ;

  if( str.size() )  // exit condition found
  {
    std::cout << str ;
    return returnValue ;
  }

  // Did any 'no-record' occur? Error cases are trapped.
  if( ensemble->isNoRec )
  {
    returnValue = printTimelessFile(str) ;
    if( str.size() )
    {
      std::cout << str ;
      return returnValue ;
    }
  }

  // Check for ambiguities, return 0 for PASS.
  // Safe for a single file, because this was processed before
  if( (returnValue = ensemble->testAmbiguity
        (str, isPrintOnlyMarked, isModificationTest, isMixingRefused )) )
  {
    if( str.size() )
    {
      std::cout << str ;
      return returnValue;
    }
  }

  // Apply user supplied time-limits and/or
  // synchronisation to a target file.
  // Note that dates are already sorted.
  // Note: does not detect any error; just adjusts index range
  returnValue = ensemble->constraint(timeLimitStr) ;

  // successful run
  print();

  return returnValue ;
}
Example #3
0
void
SyncFiles::print(void)
{
  ensemble->print();

  if( isPeriod )
    ensemble->printDateSpan();

  return ;
}
Example #4
0
File: Qc.cpp Project: WFRT/Comps
bool Qc::qc(Ensemble& iEnsemble) const {
   bool anyChanges = false;
   for(int i = 0; i < iEnsemble.size(); i++) {
      if(!check(Value(iEnsemble[i], iEnsemble.getDate(), iEnsemble.getInit(), iEnsemble.getOffset(), iEnsemble.getLocation(), iEnsemble.getVariable()))) {
         anyChanges = true;
         iEnsemble[i] = Global::MV;
      }
   }
   return anyChanges;
}
Example #5
0
void EnsembleGenerator::add_one_state(const Ensemble& init_ensemble,
                                      Ensemble& new_ensemble) {

  std::priority_queue<boost::tuple<double, int, int>,
                      Vector<boost::tuple<double, int, int> >,
                      Comparator> bestK;

  // iterate over all init MultiStateModels and try to add a new state to each
  for(unsigned int i=0; i<init_ensemble.size(); i++) {
    unsigned int first_to_search = init_ensemble[i].get_last_state()+1;
    if(first_to_search<N_) {

      if(i>0 && i%100==0 && !bestK.empty()) {
        double curr_bestK_score = boost::get<0>(bestK.top());
        std::cout << "Extending ensemble: " << i << " out of "
                  << init_ensemble.size() << " last best "
                  << curr_bestK_score << std::endl;
      }

      MultiStateModel new_model(init_ensemble[i]);
      new_model.add_state(first_to_search);

      // try all possible additions of a new state
      for(unsigned int j=first_to_search; j<N_; j++) {
        new_model.replace_last_state(j);
        double curr_score = get_score(new_model);
        if(curr_score < 0.0) continue; // invalid model
        // add to bestK
        if(bestK.size() <= K_ || curr_score < boost::get<0>(bestK.top())) {
          bestK.push(boost::make_tuple(curr_score, i, j));
          if(bestK.size() > K_) bestK.pop();
        }
      }
    }
  }

  // save best scoring
  new_ensemble.assign(bestK.size(), MultiStateModel(0));
  int index = bestK.size()-1;
  // generate bestK new MultiStateModels
  while(!bestK.empty()) {
    double score;
    int ensemble_index, new_state_index;
    boost::tie(score, ensemble_index, new_state_index) = bestK.top();
    MultiStateModel new_model(init_ensemble[ensemble_index]);
    new_model.add_state(new_state_index);
    new_model.set_score(score);
    new_ensemble[index] = new_model;
    index--;
    bestK.pop();
  }
}
Example #6
0
float MeasureEnsembleMedian::measureCore(const Ensemble& iEnsemble) const {
   float median;
   // Remove missing values
   std::vector<float> temp;
   for(int i = 0; i < iEnsemble.size(); i++) {
      if(iEnsemble[i] != Global::MV) {
         temp.push_back(iEnsemble[i]);
      }
   }
   if(temp.size() == 0) {
      median = Global::MV;
   }
   else {
      std::sort(temp.begin(), temp.end());
      unsigned int N = temp.size();
      // Even size
      if(N % 2 == 0) {
         median = (temp[N/2 - 1] + temp[N/2])/2;
      }
      // Odd size
      else {
         median = temp[floor(N/2)];
      }
   }
   return median;
}
Example #7
0
float Continuous::getMomentCore(int iMoment, const Ensemble& iEnsemble, const Parameters& iParameters) const {
   // TODO: Not tested
   const Variable* var = Variable::get(iEnsemble.getVariable());

   float total = 0;

   float minX  = var->getMin();
   float maxX  = var->getMax();
   int   nX    = 1000;
   float dX    = (maxX - minX)/((float) nX);

   float c = 0;
   if(iMoment > 1) {
      // Compute center of moment
      c = getMoment(1, iEnsemble, iParameters);
   }

   for(int i = 0; i < nX; i++) {
      float x = minX + i*dX;
      float pdf = getPdf(x, iEnsemble, iParameters);
      if(!Global::isValid(pdf))
         return Global::MV;

      total += pow(x - c, iMoment) * pdf;
   }
   return total * dX;
}
Example #8
0
void CorrectorMeasure::correctCore(const Parameters& iParameters,
                                   Ensemble& iUnCorrected) const {
    float value = mMeasure->measure(iUnCorrected);

    std::vector<float> values;
    values.push_back(value);
    iUnCorrected.setValues(values);
}
Example #9
0
float Continuous::getInvCore(float iCdf, const Ensemble& iEnsemble, const Parameters& iParameters) const {
   float step    = 10;
   // TODO
   float X       = 5;
   float currCdf = getCdf(X, iEnsemble, iParameters);
   if(!Global::isValid(currCdf))
      return Global::MV;

   int dir       = 0;
   int counter   = 0;
   const Variable* var = Variable::get(iEnsemble.getVariable());
   bool  lowerDiscrete = var->isLowerDiscrete();
   bool  upperDiscrete = var->isUpperDiscrete();
   float varMin  = var->getMin();
   float varMax  = var->getMax();

   while(fabs(currCdf - iCdf) > mInvTol) {
      if(currCdf > iCdf) {
         if(dir == 1) {
            step /= 2;
         }
         X = X - step;
         dir = -1;
      }
      else {
         if(dir == -1) {
            step /= 2;
         }
         X = X + step;
         dir = 1;
      }
      // Check that we are not stepping outside the variable's domain
      if(lowerDiscrete && X < varMin)
         X = varMin;
      if(upperDiscrete && X > varMax)
         X = varMax;

      if(!Global::isValid(X))
         return Global::MV;
      if(counter > 1000) {
         std::cout << "Continuous.cpp: Could not converge on CDF target: " << iCdf << " " << X << " " << currCdf << std::endl;
         return X;
      }
      currCdf = getCdf(X, iEnsemble, iParameters);
      if(!Global::isValid(currCdf)) {
         return Global::MV;
      }

      if(lowerDiscrete && X == varMin && currCdf > iCdf)
         return X;
      if(upperDiscrete && X == varMax && currCdf < iCdf)
         return X;
      counter++;
   }

   return X;
}
Example #10
0
void
SyncFiles::setPath(std::string &p)
{
  path=p;

  if( path.size() && path[ path.size()-1 ] != '/' )
    path += '/' ;

  ensemble->setPath(path);

  return;
}
Example #11
0
int
SyncFiles::printTimelessFile(std::string &str)
{
   // true below means: print only marked entries

   if( ensemble->sz > 1 )
   {  // occurence within an ensemble of files: error
     std::string key("17_1");
     std::string capt("Determination of temporal sequence of  files failed.");

     // More than a single file is an error; output filenames
     ensemble->enablePrintOnlyMarked();
     str = ensemble->getOutput() ;
     return 10;
   }
   else if( ensemble->sz == 1 )
   {
      if( ensemble->member[0]->state.size() )
      {
        ensemble->enablePrintOnlyMarked();
        str = ensemble->getOutput() ;
        return 3;  // a fixed field file, but with error
      }
      else
        return 4;  // a fixed field file
   }
   else
   {
      ensemble->enablePrintOnlyMarked();
      str = ensemble->getOutput() ;
      return 3 ;  // unspecific error
   }
}
Example #12
0
float MeasureLocalGradient::measureCore(const Ensemble& iEnsemble) const {
   int   date   = iEnsemble.getDate();
   int init     = iEnsemble.getInit();
   float offset = iEnsemble.getOffset();
   Location loc = iEnsemble.getLocation();

   // Determine which variable to use
   std::string var = mVariable;
   if(mVariable == "") {
      var = iEnsemble.getVariable();
   }

   std::string dataset = loc.getDataset();
   float centerValue = iEnsemble.getMoment(1);

   // Get nearby locations
   Input* input = mData.getInput(dataset);
   std::vector<Location> nearbyLocations;
   input->getSurroundingLocations(loc, nearbyLocations, 4);

   // Compute gradient
   float grad;
   int counter = 0;
   for(int i = 0; i < (int) nearbyLocations.size(); i++) {
      Ensemble ens;
      mData.getEnsemble(date, init, offset, nearbyLocations[i], dataset, var, ens);
      float mean = ens.getMoment(1);
      if(Global::isValid(mean)) {
         // TODO
         float dist = 0;
         float dvdx = 0;
         float dydy = 0;
         counter++;
      }
   }

   return iEnsemble.getMoment(2);
}
Example #13
0
int main() {
	Ensemble e;
	for (int i = 1; i <= 50; i++) {
		e.inserer(i);
	}

	cout << e.card() << endl;

	e.affiche();
	e.affiche(25);

	for (int i = 30; i <= 39; i++) {
		e.retirer(i);
	}

	e.affiche();
	cout << e.card() << endl;


	return EXIT_SUCCESS;

}
Example #14
0
File: Round.cpp Project: WFRT/Comps
void CorrectorRound::correctCore(const Parameters& iParameters, Ensemble& iUnCorrected) const {
   float threshold;
   if(Global::isValid(mFixed))
      threshold = mFixed;
   else
      threshold = iParameters[0];

   for(int i = 0; i < iUnCorrected.size(); i++) {
      float fcst = iUnCorrected[i];
      // Round values down
      if(Global::isValid(mRoundDownTo)) {
         if(Global::isValid(fcst) && fcst < threshold) {
            iUnCorrected[i] = mRoundDownTo;
         }
      }
      // Round values up
      else {
         if(Global::isValid(fcst) && fcst > threshold) {
            iUnCorrected[i] = mRoundUpTo;
         }
      }
   }
}
Example #15
0
void EnsembleGenerator::get_weights_average_and_std(const Ensemble& ensemble,
                    const Vector<saxs::WeightedFitParameters>& fps,
                    Vector<double>& weights_average,
                    Vector<double>& weights_variance) const {

  Vector<unsigned int> states_counters(N_, 0);
  Vector<Vector<double> > states_weights(N_);

  // count the number of occurences of each state in MultiStateModels
  // (states_counters) and store the weights (states_weights)
  for(unsigned int i=0; i<ensemble.size(); i++) {
    const Vector<unsigned int>& states = ensemble[i].get_states();
    const Vector<double>& weights = fps[i].get_weights();
    for(unsigned int k=0; k<states.size(); k++) {
      states_counters[states[k]]++;
      states_weights[states[k]].push_back(weights[k]);
    }
  }

  // compute weights average and variance for each state
  weights_average.insert(weights_average.begin(), N_, 0.0);
  weights_variance.insert(weights_variance.begin(), N_, 0.0);

  for(unsigned int i=0; i < N_; i++) {
    if(states_counters[i] > 0) {
      if(states_counters[i] == 1) {
        weights_average[i] = states_weights[i][0];
        weights_variance[i] = 1.0;
      } else {
        std::pair<double, double> av_std = get_average_and_stdev(states_weights[i]);
        weights_average[i] = av_std.first;
        weights_variance[i] = av_std.second;
      }
    }
  }
}
Example #16
0
File: Clim.cpp Project: WFRT/Comps
void CorrectorClim::correctCore(const Parameters& iParameters, Ensemble& iEnsemble) const {
   float climWeight = iParameters[0];
   float ensWeight = 1 - climWeight;
   float clim;
   if(mComputeClim) {
      assert(iParameters.size() == 2);
      clim = iParameters[1];
   }
   else
      clim = mData.getClim(iEnsemble.getDate(), iEnsemble.getInit(), iEnsemble.getOffset(),
            iEnsemble.getLocation(), iEnsemble.getVariable());
   if(Global::isValid(clim) && Global::isValid(ensWeight) && Global::isValid(climWeight)) {
      for(int n = 0; n < iEnsemble.size(); n++) {
         float currValue = iEnsemble[n];
         if(Global::isValid(currValue)) {
            iEnsemble[n] = currValue * ensWeight + clim * climWeight;
         }
      }
   }
}
Example #17
0
void Pixel::propageCouleur(Pixel* pixel_Adjacent){

    if(!this->pixelNoir && !pixel_Adjacent->pixelNoir){ // On vérifie que nous n'avons pas à faire avec un pixel noir (oui, il y a discrimination...)
        if (!dans_Meme_Ensemble(pixel_Adjacent) ){



            Ensemble* monEnsemble = this->getEnsemble();
            Ensemble* autreEnsemble = pixel_Adjacent->getEnsemble();

            if (monEnsemble->getSize() > autreEnsemble->getSize()){
                monEnsemble->addEnsemble_inTail(autreEnsemble);
            } else {
                autreEnsemble->addEnsemble_inTail(monEnsemble);
            }


        }
    }


}
Example #18
0
int main(int argc, char* argv[]) {

	/*********************
	** Parameters
	*********************/	
	string ipData;	
	string ipLabels;
	string ensembleFile;
	string testData;
	string opFileName;

	// training parameters
	int numTrees;
	int treeDepth; // 0 = 1 node, 1 = 3 nodes, 2 = 7 ...
	int numDimTrials;//num of dims to try < dim
	int numThreshTrials; // num of thresholds at each dimension to try
	float bagProb; // probability of sample landing in bag
	int minNoOFExsAtNode; // stop growing tree if num of examples at node equals this value

	srand ( time(NULL) );
	clock_t init;
	Ensemble trees;	


	/*********************
	** Set up parameters
	*********************/	
	bool trainingNew = false;
	if (argc == 12) {// training new forest
		
		trainingNew = true;
		numTrees = atoi(argv[1]);
		treeDepth = atoi(argv[2]);
		numDimTrials = atoi(argv[3]);
		numThreshTrials = atoi(argv[4]);
		bagProb = (float)atoi(argv[5])/100.0;
		minNoOFExsAtNode = atoi(argv[6]);

		cout << "** " << bagProb << " " << treeDepth << endl; 
		ipData = argv[7];
		ipLabels = argv[8];
		testData = argv[9];
		opFileName =  argv[10];
		ensembleFile =  argv[11];

	}
	else if (argc == 4) {// loading existing forest

		testData = argv[1];
		opFileName =  argv[2];
		ensembleFile =  argv[3];

	}
	else {// exit if invalid number of agruments passed

		displayUsage();
		return 0;
	}
	

	/*********************
	** Training
	*********************/
	if (trainingNew) {

		/*********************
		** Load training data
		*********************/		
		init=clock();
		cout << endl << "***************************" << endl << "Loading data" << endl << endl;			
		Matrix2df data;
		loadData(ipData, data);	
		Matrix2df labels;
		loadData(ipLabels, labels);
		cout << "loading data time " << (double)(clock()-init) / ((double)CLOCKS_PER_SEC) << " sec" << endl;
		

		/*********************
		** Training
		*********************/
		init=clock();
		cout << endl << "***************************" << endl << "Training" << endl;	
		trees.setParams(numTrees, treeDepth, data.cols, labels.cols);		
		trees.train(data, labels, numDimTrials, numThreshTrials, bagProb, minNoOFExsAtNode);
		cout << "\r                                      " << endl;
		cout << "training time " << (double)(clock()-init) / ((double)CLOCKS_PER_SEC) << " sec" << endl;

		/*********************
		** Save forest
		*********************/	
		trees.writeEnsemble(ensembleFile);
	}
	else {

		/*********************
		** Load forest
		*********************/		
		trees.loadEnsemble(ensembleFile);
	}


	/*********************
	** Testing
	*********************/	
	cout << endl << "***************************" << endl << "Testing" << endl << endl;
	cout << "reading test data";	
	Matrix2df test;	
	loadData(testData, test);	
	Matrix2df op;	
	init=clock();
	trees.test(test, op);
	cout << "\rtesting time " << (double)(clock()-init) / ((double)CLOCKS_PER_SEC) << " sec" << endl;


	/*********************
	** Save results
	*********************/
	writeMatrix(opFileName, op, false);

	return 0;
}
Example #19
0
void EnsembleGenerator::rescore(Ensemble& ensemble,
                                Ensemble& rescored_ensemble,
                     Vector<Vector<saxs::WeightedFitParameters> >& rescored_fps)
  const {

  unsigned int print_num = std::min((unsigned int)ensemble.size(), K_);

  Vector<Vector<saxs::WeightedFitParameters> > fps(scorers_.size()),
    sorted_fps(scorers_.size());
  std::multimap<double, unsigned int> scores;
  unsigned int counter = 0;

  // re-score
  for(unsigned int i = 0; i < ensemble.size(); i++) {
    if(i>0 && i%100==0) {
      std::cerr << "Rescoring ensemble " << i << " out of " << ensemble.size() << std::endl;
    }
    // iterate scorers and record max weight for each state
    Vector<double> max_weights(ensemble[i].size(), 0.0);
    double score = 0;
    for(unsigned int k = 0; k < scorers_.size(); k++) {
      saxs::WeightedFitParameters p = scorers_[k]->get_fit_parameters(ensemble[i]);
      score += p.get_score();
      // find the max weight contribution of each state
      for(unsigned int wi = 0; wi < p.get_weights().size(); wi++) {
        if(p.get_weights()[wi] > max_weights[wi])
          max_weights[wi] = p.get_weights()[wi];
      }
      fps[k].push_back(p);
    }
    ensemble[i].set_score(score);

    // check max weights for threshold
    for(unsigned int wi=0; wi<max_weights.size(); wi++) {
      if(max_weights[wi] < min_weight_threshold_) {
        ensemble[i].set_score(-1);
        break;
      }
    }

    // do not output MultiStateModels with one of the weights below threshold
    if(ensemble[i].get_score() < 0.0) continue;
    scores.insert(std::make_pair(ensemble[i].get_score(), i));
    counter++;

    if(counter >= K_) break;
  }

  // sort
  Ensemble sorted_ensemble;
  sorted_ensemble.reserve(print_num);
  std::multimap<double, unsigned int>::iterator it, end_it = scores.end();
  for(it = scores.begin(); it != end_it; it++) {
    //std::cerr << "score = " << it->first << std::endl;
    sorted_ensemble.push_back(ensemble[it->second]);
    for(unsigned int k=0; k<scorers_.size(); k++) {
      sorted_fps[k].push_back(fps[k][it->second]);
    }
  }
  rescored_ensemble = sorted_ensemble;
  rescored_fps = sorted_fps;
}
Example #20
0
void EnsembleGenerator::output(Ensemble& ensemble,
       const Vector<Vector<saxs::WeightedFitParameters> >& fps) const {

  if(ensemble.size() == 0) return;

  // calculate z-score
  Vector<double> scores(ensemble.size());
  for(unsigned int i=0; i<ensemble.size(); i++) scores[i] = ensemble[i].get_score();
  std::pair<double, double> average_and_std = get_average_and_stdev(scores);
  for(unsigned int i=0; i<ensemble.size(); i++) {
    double zscore = (ensemble[i].get_score()-average_and_std.first) /
      average_and_std.second;
    ensemble[i].set_zscore(zscore);
  }

  // calculate frequency of each state
  Vector<double> state_prob;
  get_state_probabilities(ensemble, state_prob);

  // calculate weights average and variance
  Vector<Vector<double> > weights_average(scorers_.size()),
    weights_variance(scorers_.size());
  for(unsigned int i=0; i<scorers_.size(); i++) {
    get_weights_average_and_std(ensemble, fps[i], weights_average[i],
                                weights_variance[i]);
  }

  // output file
  unsigned int number_of_states = ensemble[0].size();
  std::string out_file_name = "ensembles_size_" +
    std::string(boost::lexical_cast<std::string>(number_of_states)) + ".txt";
  std::ofstream s(out_file_name.c_str());
  std::cout << "multi_state_model_size " << ensemble.size ()
            << " number_of_states " << number_of_states << std::endl;

  for(unsigned int i=0; i<ensemble.size(); i++) {
    // output ensemble scores
    s.setf(std::ios::fixed, std::ios::floatfield);
    s << i+1 << " | " << std::setw(5) << std::setprecision(2)
      << ensemble[i].get_score(); // << " | " << ensemble[i].get_zscore();

    // output scores for each scorer
    for(unsigned int j=0; j<scorers_.size(); j++) {
      const saxs::WeightedFitParameters& p = fps[j][i];
      s << " | x" << std::string(boost::lexical_cast<std::string>(j+1))
        //scorers_[j]->get_dataset_name() << ": "
        << " " << std::setprecision(2) << p.get_chi()
        << " (" << p.get_c1() << ", " << p.get_c2() << ")";
    }
    s << std::endl;

    // output states and their probabilities
    const Vector<unsigned int>& states = ensemble[i].get_states();
    for(unsigned int k=0; k<states.size(); k++) {
      s << std::setw(5) << states[k];

      // output weights
      for(unsigned int j=0; j<scorers_.size(); j++) {
        const saxs::WeightedFitParameters& p = fps[j][i];
        if(p.get_weights().size() > k) {
          s << std::setw(5) << std::setprecision(3) << " | "
            << p.get_weights()[k] << " ("
            << weights_average[j][states[k]] << ", "
            << weights_variance[j][states[k]] << ")";
        }
      }
      s << " | "  << scorers_[0]->get_state_name(states[k])
        << " (" << state_prob[states[k]] << ")" << std::endl;
    }

    // output fit file
    if(i<10) { // TODO: add parameter
      for(unsigned int j=0; j<scorers_.size(); j++) {
        std::string fit_file_name = "multi_state_model_" +
          std::string(boost::lexical_cast<std::string>(number_of_states)) + "_" +
          std::string(boost::lexical_cast<std::string>(i+1));
        if(scorers_.size() > 0) {
          fit_file_name +=  "_" + std::string(boost::lexical_cast<std::string>(j+1));
        }
        fit_file_name += ".dat";
        scorers_[j]->write_fit_file(ensemble[i], fps[j][i], fit_file_name);
      }
    }
  }
  s.close();
}
Example #21
0
void Transform::transform(Ensemble& iEnsemble) const {
   for(int i = 0; i < iEnsemble.size(); i++) {
      iEnsemble[i] = transform(iEnsemble[i]);
   }
}
Example #22
0
void Transform::derivative(Ensemble& iEnsemble) const {
   for(int i = 0; i < iEnsemble.size(); i++) {
      iEnsemble[i] = derivative(iEnsemble[i]);
   }
}
Example #23
0
void Transform::inverse(Ensemble& iEnsemble) const {
   for(int i = 0; i < iEnsemble.size(); i++) {
      iEnsemble[i] = inverse(iEnsemble[i]);
   }
}