void EnsembleGenerator::get_state_probabilities(const Ensemble& ensemble, Vector<double>& state_prob) const { Vector<unsigned int> states_counters(N_, 0); unsigned int total_state_num = 0; // count the number of occurences of each state in MultiStateModels // in the entire Ensemble (states_counters) for(unsigned int i=0; i<ensemble.size(); i++) { const Vector<unsigned int>& states = ensemble[i].get_states(); for(unsigned int k=0; k<states.size(); k++) { states_counters[states[k]]++; total_state_num++; } } // compute state probs and weight variance state_prob.insert(state_prob.begin(), N_, 0.0); // compute the probability of each state to appear in the MultiStateModels // (state_prob), it's average weight across models (weight_average) // and variance (weight_variance) for(unsigned int i=0; i<N_; i++) { if(states_counters[i] > 0) { if(states_counters[i] == 1) { state_prob[i] = 1.0/total_state_num; } else { state_prob[i] = states_counters[i]/(double)ensemble.size(); } } } }
int SyncFiles::run(void) { returnValue=0; // we append the qa_target, if available if( qa_target.size() ) ensemble->addTarget(qa_target); // read dates from nc-files, // sort dates in ascending order (old --> young) // and get modification times of files. // Any annotation would be done there. std::string str; returnValue = ensemble->getTimes(str) ; if( str.size() ) // exit condition found { std::cout << str ; return returnValue ; } // Did any 'no-record' occur? Error cases are trapped. if( ensemble->isNoRec ) { returnValue = printTimelessFile(str) ; if( str.size() ) { std::cout << str ; return returnValue ; } } // Check for ambiguities, return 0 for PASS. // Safe for a single file, because this was processed before if( (returnValue = ensemble->testAmbiguity (str, isPrintOnlyMarked, isModificationTest, isMixingRefused )) ) { if( str.size() ) { std::cout << str ; return returnValue; } } // Apply user supplied time-limits and/or // synchronisation to a target file. // Note that dates are already sorted. // Note: does not detect any error; just adjusts index range returnValue = ensemble->constraint(timeLimitStr) ; // successful run print(); return returnValue ; }
void SyncFiles::print(void) { ensemble->print(); if( isPeriod ) ensemble->printDateSpan(); return ; }
bool Qc::qc(Ensemble& iEnsemble) const { bool anyChanges = false; for(int i = 0; i < iEnsemble.size(); i++) { if(!check(Value(iEnsemble[i], iEnsemble.getDate(), iEnsemble.getInit(), iEnsemble.getOffset(), iEnsemble.getLocation(), iEnsemble.getVariable()))) { anyChanges = true; iEnsemble[i] = Global::MV; } } return anyChanges; }
void EnsembleGenerator::add_one_state(const Ensemble& init_ensemble, Ensemble& new_ensemble) { std::priority_queue<boost::tuple<double, int, int>, Vector<boost::tuple<double, int, int> >, Comparator> bestK; // iterate over all init MultiStateModels and try to add a new state to each for(unsigned int i=0; i<init_ensemble.size(); i++) { unsigned int first_to_search = init_ensemble[i].get_last_state()+1; if(first_to_search<N_) { if(i>0 && i%100==0 && !bestK.empty()) { double curr_bestK_score = boost::get<0>(bestK.top()); std::cout << "Extending ensemble: " << i << " out of " << init_ensemble.size() << " last best " << curr_bestK_score << std::endl; } MultiStateModel new_model(init_ensemble[i]); new_model.add_state(first_to_search); // try all possible additions of a new state for(unsigned int j=first_to_search; j<N_; j++) { new_model.replace_last_state(j); double curr_score = get_score(new_model); if(curr_score < 0.0) continue; // invalid model // add to bestK if(bestK.size() <= K_ || curr_score < boost::get<0>(bestK.top())) { bestK.push(boost::make_tuple(curr_score, i, j)); if(bestK.size() > K_) bestK.pop(); } } } } // save best scoring new_ensemble.assign(bestK.size(), MultiStateModel(0)); int index = bestK.size()-1; // generate bestK new MultiStateModels while(!bestK.empty()) { double score; int ensemble_index, new_state_index; boost::tie(score, ensemble_index, new_state_index) = bestK.top(); MultiStateModel new_model(init_ensemble[ensemble_index]); new_model.add_state(new_state_index); new_model.set_score(score); new_ensemble[index] = new_model; index--; bestK.pop(); } }
float MeasureEnsembleMedian::measureCore(const Ensemble& iEnsemble) const { float median; // Remove missing values std::vector<float> temp; for(int i = 0; i < iEnsemble.size(); i++) { if(iEnsemble[i] != Global::MV) { temp.push_back(iEnsemble[i]); } } if(temp.size() == 0) { median = Global::MV; } else { std::sort(temp.begin(), temp.end()); unsigned int N = temp.size(); // Even size if(N % 2 == 0) { median = (temp[N/2 - 1] + temp[N/2])/2; } // Odd size else { median = temp[floor(N/2)]; } } return median; }
float Continuous::getMomentCore(int iMoment, const Ensemble& iEnsemble, const Parameters& iParameters) const { // TODO: Not tested const Variable* var = Variable::get(iEnsemble.getVariable()); float total = 0; float minX = var->getMin(); float maxX = var->getMax(); int nX = 1000; float dX = (maxX - minX)/((float) nX); float c = 0; if(iMoment > 1) { // Compute center of moment c = getMoment(1, iEnsemble, iParameters); } for(int i = 0; i < nX; i++) { float x = minX + i*dX; float pdf = getPdf(x, iEnsemble, iParameters); if(!Global::isValid(pdf)) return Global::MV; total += pow(x - c, iMoment) * pdf; } return total * dX; }
void CorrectorMeasure::correctCore(const Parameters& iParameters, Ensemble& iUnCorrected) const { float value = mMeasure->measure(iUnCorrected); std::vector<float> values; values.push_back(value); iUnCorrected.setValues(values); }
float Continuous::getInvCore(float iCdf, const Ensemble& iEnsemble, const Parameters& iParameters) const { float step = 10; // TODO float X = 5; float currCdf = getCdf(X, iEnsemble, iParameters); if(!Global::isValid(currCdf)) return Global::MV; int dir = 0; int counter = 0; const Variable* var = Variable::get(iEnsemble.getVariable()); bool lowerDiscrete = var->isLowerDiscrete(); bool upperDiscrete = var->isUpperDiscrete(); float varMin = var->getMin(); float varMax = var->getMax(); while(fabs(currCdf - iCdf) > mInvTol) { if(currCdf > iCdf) { if(dir == 1) { step /= 2; } X = X - step; dir = -1; } else { if(dir == -1) { step /= 2; } X = X + step; dir = 1; } // Check that we are not stepping outside the variable's domain if(lowerDiscrete && X < varMin) X = varMin; if(upperDiscrete && X > varMax) X = varMax; if(!Global::isValid(X)) return Global::MV; if(counter > 1000) { std::cout << "Continuous.cpp: Could not converge on CDF target: " << iCdf << " " << X << " " << currCdf << std::endl; return X; } currCdf = getCdf(X, iEnsemble, iParameters); if(!Global::isValid(currCdf)) { return Global::MV; } if(lowerDiscrete && X == varMin && currCdf > iCdf) return X; if(upperDiscrete && X == varMax && currCdf < iCdf) return X; counter++; } return X; }
void SyncFiles::setPath(std::string &p) { path=p; if( path.size() && path[ path.size()-1 ] != '/' ) path += '/' ; ensemble->setPath(path); return; }
int SyncFiles::printTimelessFile(std::string &str) { // true below means: print only marked entries if( ensemble->sz > 1 ) { // occurence within an ensemble of files: error std::string key("17_1"); std::string capt("Determination of temporal sequence of files failed."); // More than a single file is an error; output filenames ensemble->enablePrintOnlyMarked(); str = ensemble->getOutput() ; return 10; } else if( ensemble->sz == 1 ) { if( ensemble->member[0]->state.size() ) { ensemble->enablePrintOnlyMarked(); str = ensemble->getOutput() ; return 3; // a fixed field file, but with error } else return 4; // a fixed field file } else { ensemble->enablePrintOnlyMarked(); str = ensemble->getOutput() ; return 3 ; // unspecific error } }
float MeasureLocalGradient::measureCore(const Ensemble& iEnsemble) const { int date = iEnsemble.getDate(); int init = iEnsemble.getInit(); float offset = iEnsemble.getOffset(); Location loc = iEnsemble.getLocation(); // Determine which variable to use std::string var = mVariable; if(mVariable == "") { var = iEnsemble.getVariable(); } std::string dataset = loc.getDataset(); float centerValue = iEnsemble.getMoment(1); // Get nearby locations Input* input = mData.getInput(dataset); std::vector<Location> nearbyLocations; input->getSurroundingLocations(loc, nearbyLocations, 4); // Compute gradient float grad; int counter = 0; for(int i = 0; i < (int) nearbyLocations.size(); i++) { Ensemble ens; mData.getEnsemble(date, init, offset, nearbyLocations[i], dataset, var, ens); float mean = ens.getMoment(1); if(Global::isValid(mean)) { // TODO float dist = 0; float dvdx = 0; float dydy = 0; counter++; } } return iEnsemble.getMoment(2); }
int main() { Ensemble e; for (int i = 1; i <= 50; i++) { e.inserer(i); } cout << e.card() << endl; e.affiche(); e.affiche(25); for (int i = 30; i <= 39; i++) { e.retirer(i); } e.affiche(); cout << e.card() << endl; return EXIT_SUCCESS; }
void CorrectorRound::correctCore(const Parameters& iParameters, Ensemble& iUnCorrected) const { float threshold; if(Global::isValid(mFixed)) threshold = mFixed; else threshold = iParameters[0]; for(int i = 0; i < iUnCorrected.size(); i++) { float fcst = iUnCorrected[i]; // Round values down if(Global::isValid(mRoundDownTo)) { if(Global::isValid(fcst) && fcst < threshold) { iUnCorrected[i] = mRoundDownTo; } } // Round values up else { if(Global::isValid(fcst) && fcst > threshold) { iUnCorrected[i] = mRoundUpTo; } } } }
void EnsembleGenerator::get_weights_average_and_std(const Ensemble& ensemble, const Vector<saxs::WeightedFitParameters>& fps, Vector<double>& weights_average, Vector<double>& weights_variance) const { Vector<unsigned int> states_counters(N_, 0); Vector<Vector<double> > states_weights(N_); // count the number of occurences of each state in MultiStateModels // (states_counters) and store the weights (states_weights) for(unsigned int i=0; i<ensemble.size(); i++) { const Vector<unsigned int>& states = ensemble[i].get_states(); const Vector<double>& weights = fps[i].get_weights(); for(unsigned int k=0; k<states.size(); k++) { states_counters[states[k]]++; states_weights[states[k]].push_back(weights[k]); } } // compute weights average and variance for each state weights_average.insert(weights_average.begin(), N_, 0.0); weights_variance.insert(weights_variance.begin(), N_, 0.0); for(unsigned int i=0; i < N_; i++) { if(states_counters[i] > 0) { if(states_counters[i] == 1) { weights_average[i] = states_weights[i][0]; weights_variance[i] = 1.0; } else { std::pair<double, double> av_std = get_average_and_stdev(states_weights[i]); weights_average[i] = av_std.first; weights_variance[i] = av_std.second; } } } }
void CorrectorClim::correctCore(const Parameters& iParameters, Ensemble& iEnsemble) const { float climWeight = iParameters[0]; float ensWeight = 1 - climWeight; float clim; if(mComputeClim) { assert(iParameters.size() == 2); clim = iParameters[1]; } else clim = mData.getClim(iEnsemble.getDate(), iEnsemble.getInit(), iEnsemble.getOffset(), iEnsemble.getLocation(), iEnsemble.getVariable()); if(Global::isValid(clim) && Global::isValid(ensWeight) && Global::isValid(climWeight)) { for(int n = 0; n < iEnsemble.size(); n++) { float currValue = iEnsemble[n]; if(Global::isValid(currValue)) { iEnsemble[n] = currValue * ensWeight + clim * climWeight; } } } }
void Pixel::propageCouleur(Pixel* pixel_Adjacent){ if(!this->pixelNoir && !pixel_Adjacent->pixelNoir){ // On vérifie que nous n'avons pas à faire avec un pixel noir (oui, il y a discrimination...) if (!dans_Meme_Ensemble(pixel_Adjacent) ){ Ensemble* monEnsemble = this->getEnsemble(); Ensemble* autreEnsemble = pixel_Adjacent->getEnsemble(); if (monEnsemble->getSize() > autreEnsemble->getSize()){ monEnsemble->addEnsemble_inTail(autreEnsemble); } else { autreEnsemble->addEnsemble_inTail(monEnsemble); } } } }
int main(int argc, char* argv[]) { /********************* ** Parameters *********************/ string ipData; string ipLabels; string ensembleFile; string testData; string opFileName; // training parameters int numTrees; int treeDepth; // 0 = 1 node, 1 = 3 nodes, 2 = 7 ... int numDimTrials;//num of dims to try < dim int numThreshTrials; // num of thresholds at each dimension to try float bagProb; // probability of sample landing in bag int minNoOFExsAtNode; // stop growing tree if num of examples at node equals this value srand ( time(NULL) ); clock_t init; Ensemble trees; /********************* ** Set up parameters *********************/ bool trainingNew = false; if (argc == 12) {// training new forest trainingNew = true; numTrees = atoi(argv[1]); treeDepth = atoi(argv[2]); numDimTrials = atoi(argv[3]); numThreshTrials = atoi(argv[4]); bagProb = (float)atoi(argv[5])/100.0; minNoOFExsAtNode = atoi(argv[6]); cout << "** " << bagProb << " " << treeDepth << endl; ipData = argv[7]; ipLabels = argv[8]; testData = argv[9]; opFileName = argv[10]; ensembleFile = argv[11]; } else if (argc == 4) {// loading existing forest testData = argv[1]; opFileName = argv[2]; ensembleFile = argv[3]; } else {// exit if invalid number of agruments passed displayUsage(); return 0; } /********************* ** Training *********************/ if (trainingNew) { /********************* ** Load training data *********************/ init=clock(); cout << endl << "***************************" << endl << "Loading data" << endl << endl; Matrix2df data; loadData(ipData, data); Matrix2df labels; loadData(ipLabels, labels); cout << "loading data time " << (double)(clock()-init) / ((double)CLOCKS_PER_SEC) << " sec" << endl; /********************* ** Training *********************/ init=clock(); cout << endl << "***************************" << endl << "Training" << endl; trees.setParams(numTrees, treeDepth, data.cols, labels.cols); trees.train(data, labels, numDimTrials, numThreshTrials, bagProb, minNoOFExsAtNode); cout << "\r " << endl; cout << "training time " << (double)(clock()-init) / ((double)CLOCKS_PER_SEC) << " sec" << endl; /********************* ** Save forest *********************/ trees.writeEnsemble(ensembleFile); } else { /********************* ** Load forest *********************/ trees.loadEnsemble(ensembleFile); } /********************* ** Testing *********************/ cout << endl << "***************************" << endl << "Testing" << endl << endl; cout << "reading test data"; Matrix2df test; loadData(testData, test); Matrix2df op; init=clock(); trees.test(test, op); cout << "\rtesting time " << (double)(clock()-init) / ((double)CLOCKS_PER_SEC) << " sec" << endl; /********************* ** Save results *********************/ writeMatrix(opFileName, op, false); return 0; }
void EnsembleGenerator::rescore(Ensemble& ensemble, Ensemble& rescored_ensemble, Vector<Vector<saxs::WeightedFitParameters> >& rescored_fps) const { unsigned int print_num = std::min((unsigned int)ensemble.size(), K_); Vector<Vector<saxs::WeightedFitParameters> > fps(scorers_.size()), sorted_fps(scorers_.size()); std::multimap<double, unsigned int> scores; unsigned int counter = 0; // re-score for(unsigned int i = 0; i < ensemble.size(); i++) { if(i>0 && i%100==0) { std::cerr << "Rescoring ensemble " << i << " out of " << ensemble.size() << std::endl; } // iterate scorers and record max weight for each state Vector<double> max_weights(ensemble[i].size(), 0.0); double score = 0; for(unsigned int k = 0; k < scorers_.size(); k++) { saxs::WeightedFitParameters p = scorers_[k]->get_fit_parameters(ensemble[i]); score += p.get_score(); // find the max weight contribution of each state for(unsigned int wi = 0; wi < p.get_weights().size(); wi++) { if(p.get_weights()[wi] > max_weights[wi]) max_weights[wi] = p.get_weights()[wi]; } fps[k].push_back(p); } ensemble[i].set_score(score); // check max weights for threshold for(unsigned int wi=0; wi<max_weights.size(); wi++) { if(max_weights[wi] < min_weight_threshold_) { ensemble[i].set_score(-1); break; } } // do not output MultiStateModels with one of the weights below threshold if(ensemble[i].get_score() < 0.0) continue; scores.insert(std::make_pair(ensemble[i].get_score(), i)); counter++; if(counter >= K_) break; } // sort Ensemble sorted_ensemble; sorted_ensemble.reserve(print_num); std::multimap<double, unsigned int>::iterator it, end_it = scores.end(); for(it = scores.begin(); it != end_it; it++) { //std::cerr << "score = " << it->first << std::endl; sorted_ensemble.push_back(ensemble[it->second]); for(unsigned int k=0; k<scorers_.size(); k++) { sorted_fps[k].push_back(fps[k][it->second]); } } rescored_ensemble = sorted_ensemble; rescored_fps = sorted_fps; }
void EnsembleGenerator::output(Ensemble& ensemble, const Vector<Vector<saxs::WeightedFitParameters> >& fps) const { if(ensemble.size() == 0) return; // calculate z-score Vector<double> scores(ensemble.size()); for(unsigned int i=0; i<ensemble.size(); i++) scores[i] = ensemble[i].get_score(); std::pair<double, double> average_and_std = get_average_and_stdev(scores); for(unsigned int i=0; i<ensemble.size(); i++) { double zscore = (ensemble[i].get_score()-average_and_std.first) / average_and_std.second; ensemble[i].set_zscore(zscore); } // calculate frequency of each state Vector<double> state_prob; get_state_probabilities(ensemble, state_prob); // calculate weights average and variance Vector<Vector<double> > weights_average(scorers_.size()), weights_variance(scorers_.size()); for(unsigned int i=0; i<scorers_.size(); i++) { get_weights_average_and_std(ensemble, fps[i], weights_average[i], weights_variance[i]); } // output file unsigned int number_of_states = ensemble[0].size(); std::string out_file_name = "ensembles_size_" + std::string(boost::lexical_cast<std::string>(number_of_states)) + ".txt"; std::ofstream s(out_file_name.c_str()); std::cout << "multi_state_model_size " << ensemble.size () << " number_of_states " << number_of_states << std::endl; for(unsigned int i=0; i<ensemble.size(); i++) { // output ensemble scores s.setf(std::ios::fixed, std::ios::floatfield); s << i+1 << " | " << std::setw(5) << std::setprecision(2) << ensemble[i].get_score(); // << " | " << ensemble[i].get_zscore(); // output scores for each scorer for(unsigned int j=0; j<scorers_.size(); j++) { const saxs::WeightedFitParameters& p = fps[j][i]; s << " | x" << std::string(boost::lexical_cast<std::string>(j+1)) //scorers_[j]->get_dataset_name() << ": " << " " << std::setprecision(2) << p.get_chi() << " (" << p.get_c1() << ", " << p.get_c2() << ")"; } s << std::endl; // output states and their probabilities const Vector<unsigned int>& states = ensemble[i].get_states(); for(unsigned int k=0; k<states.size(); k++) { s << std::setw(5) << states[k]; // output weights for(unsigned int j=0; j<scorers_.size(); j++) { const saxs::WeightedFitParameters& p = fps[j][i]; if(p.get_weights().size() > k) { s << std::setw(5) << std::setprecision(3) << " | " << p.get_weights()[k] << " (" << weights_average[j][states[k]] << ", " << weights_variance[j][states[k]] << ")"; } } s << " | " << scorers_[0]->get_state_name(states[k]) << " (" << state_prob[states[k]] << ")" << std::endl; } // output fit file if(i<10) { // TODO: add parameter for(unsigned int j=0; j<scorers_.size(); j++) { std::string fit_file_name = "multi_state_model_" + std::string(boost::lexical_cast<std::string>(number_of_states)) + "_" + std::string(boost::lexical_cast<std::string>(i+1)); if(scorers_.size() > 0) { fit_file_name += "_" + std::string(boost::lexical_cast<std::string>(j+1)); } fit_file_name += ".dat"; scorers_[j]->write_fit_file(ensemble[i], fps[j][i], fit_file_name); } } } s.close(); }
void Transform::transform(Ensemble& iEnsemble) const { for(int i = 0; i < iEnsemble.size(); i++) { iEnsemble[i] = transform(iEnsemble[i]); } }
void Transform::derivative(Ensemble& iEnsemble) const { for(int i = 0; i < iEnsemble.size(); i++) { iEnsemble[i] = derivative(iEnsemble[i]); } }
void Transform::inverse(Ensemble& iEnsemble) const { for(int i = 0; i < iEnsemble.size(); i++) { iEnsemble[i] = inverse(iEnsemble[i]); } }