void InterpolationSurrogateBuilder<V,M>::sync_data( std::vector<unsigned int>& local_n, std::vector<double>& local_values, InterpolationSurrogateData<V,M>& data ) { // Only members of the inter0comm will do the communication of the local values unsigned int my_subrank = data.get_paramDomain().env().subRank(); if( my_subrank == 0 ) { std::vector<double> all_values(data.n_values()); std::vector<unsigned int> all_indices(data.n_values()); std::vector<int> strides; this->compute_strides( strides ); const MpiComm& inter0comm = data.get_paramDomain().env().inter0Comm(); /*! \todo Would be more efficient to pack local_n and local_values togethers and do Gatherv only once. */ inter0comm.Gatherv( &local_n[0], local_n.size(), MPI_UNSIGNED, &all_indices[0], &m_njobs[0], &strides[0], MPI_UNSIGNED, 0 /*root*/, "InterpolationSurrogateBuilder::sync_data()", "MpiComm::gatherv() failed!" ); inter0comm.Gatherv( &local_values[0], local_values.size(), MPI_DOUBLE, &all_values[0], &m_njobs[0], &strides[0], MPI_DOUBLE, 0 /*root*/, "InterpolationSurrogateBuilder::sync_data()", "MpiComm::gatherv() failed!" ); // Now set the values. /* PB: Although we are guaranteed per-rank ordering of the data we gathered, I'm not sure we can assume the same continuity of the inter0 ranks, i.e. I'm not sure how QUESO ordered the inter0 ranks. So, we go ahead and manually set the values. */ if( data.get_paramDomain().env().subRank() == 0 ) { for( unsigned int n = 0; n < data.n_values(); n++ ) data.set_value( all_indices[n], all_values[n] ); } } // Now broadcast the values data to all other processes data.sync_values( 0 /*root*/); }
FullModelComposition<Vec,Mat>::FullModelComposition( int argc, char** argv, const QUESO::BaseEnvironment& queso_env, const GetPot& model_input ) : _model(ModelBuilder<Vec,Mat>::build_model(queso_env,model_input)), _comm_handler(queso_env.subComm().Comm(), model_input.vector_variable_size("Likelihood/datasets") ) { // Grab the datasets we'll be working with unsigned int n_datasets = model_input.vector_variable_size("Likelihood/datasets"); std::vector<std::string> datasets(n_datasets); for( unsigned int d = 0; d < n_datasets; d++ ) { datasets[d] = model_input( "Likelihood/datasets", "DIE!", d ); } // This is the dataset the current set of processors is going to work on int dataset_index = this->_comm_handler.get_dataset_index(); // Input for this dataset _forward_run_input.reset( new GetPot(datasets[dataset_index]) ); // Setup data space, 2 datapoints per dataset unsigned int n_datapoints = 2*n_datasets; QUESO::VectorSpace<Vec,Mat> data_space( queso_env, "data_", n_datapoints, NULL); _observations.reset( data_space.newVector() ); _covariance.reset( data_space.newVector() ); // Now parse data values and the corresponding covariances // Each processor parses its own dataset // Then we'll gather/broadcast to everyone std::vector<double> local_values(2); std::vector<double> all_values(n_datapoints); // Convention, mass_loss is first, then avg_N local_values[0] = (*_forward_run_input)("MassLossLikelihood/data_value", 0.0); local_values[1] = (*_forward_run_input)("AverageNLikelihood/data_value", 0.0); if( _comm_handler.get_inter0_rank() >= 0 ) MPI_Gather( &local_values[0], 2, MPI_DOUBLE, &all_values[0], 2, MPI_DOUBLE, 0, _comm_handler.get_inter_chain_0_comm() ); MPI_Bcast( &all_values[0], n_datapoints, MPI_DOUBLE, 0, _comm_handler.get_inter_chain_comm() ); for( unsigned int i = 0; i < n_datapoints; i++ ) (*_observations)[i] = all_values[i]; local_values[0] = (*_forward_run_input)("MassLossLikelihood/sigma", -1.0); local_values[1] = (*_forward_run_input)("AverageNLikelihood/sigma", -1.0); if( _comm_handler.get_inter0_rank() >= 0 ) MPI_Gather( &local_values[0], 2, MPI_DOUBLE, &all_values[0], 2, MPI_DOUBLE, 0, _comm_handler.get_inter_chain_0_comm() ); MPI_Bcast( &all_values[0], n_datapoints, MPI_DOUBLE, 0, _comm_handler.get_inter_chain_comm() ); for( unsigned int i = 0; i < n_datapoints; i++ ) (*_covariance)[i] = all_values[i]; // Now setup model to be evaluated on this set of processors // We do this last because of the UFO check in GRINS _model_evaluator.reset( new FullModelEvaluator<Vec,Mat>(argc,argv, queso_env, *(_forward_run_input.get()), _comm_handler.get_split_chain_comm(), *(_model.get())) ); }
bool rgrsn_ldp::dynamic_programming(const vnl_matrix<double> & data, double v_min, double v_max, unsigned int nBin, int nJumpBin, unsigned int windowSize, vnl_vector<double> & optimalSignal, vnl_vector<double> & signal_variance) { assert(v_min < v_max); // raw data to probability map // quantilization const int N = data.rows(); vnl_matrix<double> probMap = vnl_matrix<double>(N, nBin); double interval = (v_max - v_min)/nBin; for (int r = 0; r<N; r++) { for (int c = 0; c<data.cols(); c++) { int num = value_to_bin_number(v_min, interval, data[r][c], nBin); probMap[r][num] += 1.0; } } probMap /= data.cols(); // normalization vcl_vector<double> optimalValues(N, 0); vcl_vector<int> numValues(N, 0); // multiple values from local dynamic programming vcl_vector<vcl_vector<double> > all_values(N); for (int i = 0; i<=N - windowSize; i++) { // get a local probMap; vnl_matrix<double> localProbMap = probMap.extract(windowSize, probMap.cols(), i, 0); vcl_vector<int> localOptimalBins; rgrsn_ldp::local_dynamic_programming(localProbMap, nJumpBin, localOptimalBins); assert(localOptimalBins.size() == windowSize); for (int j = 0; j < localOptimalBins.size(); j++) { double value = bin_number_to_value(v_min, interval, localOptimalBins[j]); assert(j + i < N); all_values[j + i].push_back(value); numValues[j + i] += 1; optimalValues[j + i] += value; } } // mean value for (int i = 0; i<optimalValues.size(); i++) { optimalValues[i] /= numValues[i]; } // variance signal_variance = vnl_vector<double>(N, 0); for (int i = 0; i<optimalValues.size(); i++) { assert(all_values[i].size() > 0); if (all_values[i].size() == 1) { signal_variance[i] = 0.0001; } else { double dump_mean = 0.0; double sigma = 0.0; VnlPlus::mean_std(&all_values[i][0], (int)all_values[i].size(), dump_mean, sigma); signal_variance[i] = sigma + 0.0001; // avoid zero } } optimalSignal = vnl_vector<double>(&optimalValues[0], (int)optimalValues.size()); // save variance with the size of window size, for test purpose if(0) { vcl_vector<vnl_vector<double> > all_value_vecs; for (int i = 0; i<all_values.size(); i++) { if (all_values[i].size() == windowSize) { all_value_vecs.push_back(VnlPlus::vector_2_vec(all_values[i])); } } vcl_string save_file("ldp_all_prediction.mat"); vnl_matlab_filewrite awriter(save_file.c_str()); awriter.write(VnlPlus::vector_2_mat(all_value_vecs), "ldp_all_opt_path"); printf("save to %s\n", save_file.c_str()); } return true; }
bool rgrsn_ldp::local_viterbi(const vnl_matrix<double> & data, double resolution, const vnl_vector<double> & transition, unsigned int window_size, vnl_vector<double> & optimal_signal, vnl_vector<double> & signal_variance) { assert(resolution > 0.0); assert(transition.size()%2 == 1); const double min_v = data.min_value(); const double max_v = data.max_value(); const int nBin = (max_v - min_v)/resolution; // raw data to probability map // quantilization const int N = data.rows(); vnl_matrix<double> probMap = vnl_matrix<double>(N, nBin); for (int r = 0; r<N; r++) { for (int c = 0; c<data.cols(); c++) { int num = value_to_bin_number(min_v, resolution, data[r][c], nBin); probMap[r][num] += 1.0; } } probMap /= data.cols(); // normalization vcl_vector<double> optimalValues(N, 0); vcl_vector<int> numValues(N, 0); // multiple values from local dynamic programming vcl_vector<vcl_vector<double> > all_values(N); // for calculate variance for (int i = 0; i<=N - window_size; i++) { // get a local probMap; vnl_matrix<double> localProbMap = probMap.extract(window_size, probMap.cols(), i, 0); vcl_vector<int> localOptimalBins; rgrsn_ldp::viterbi(localProbMap, transition, localOptimalBins); assert(localOptimalBins.size() == window_size); for (int j = 0; j < localOptimalBins.size(); j++) { double value = bin_number_to_value(min_v, resolution, localOptimalBins[j]); numValues[j + i] += 1; optimalValues[j + i] += value; all_values[j + i].push_back(value); } } // for (int i = 0; i<optimalValues.size(); i++) { optimalValues[i] /= numValues[i]; } optimal_signal = vnl_vector<double>(&optimalValues[0], (int)optimalValues.size()); if(1) { vcl_vector<vnl_vector<double> > all_value_vecs; for (int i = 0; i<all_values.size(); i++) { if (all_values[i].size() == window_size) { all_value_vecs.push_back(VnlPlus::vector_2_vec(all_values[i])); } } vcl_string save_file("lv_all_prediction.mat"); vnl_matlab_filewrite awriter(save_file.c_str()); awriter.write(VnlPlus::vector_2_mat(all_value_vecs), "lv_all_opt_path"); printf("save to %s\n", save_file.c_str()); } return true; }
bool rgrsn_ldp::dynamic_programming_median(const vnl_matrix<double> & data, double v_min, double v_max, unsigned int nBin, int nJumpBin, unsigned int windowSize, vnl_vector<double> & optimalSignal, vnl_vector<double> & medianSignal) { assert(v_min < v_max); // raw data to probability map // quantilization const int N = data.rows(); vnl_matrix<double> probMap = vnl_matrix<double>(N, nBin); double interval = (v_max - v_min)/nBin; for (int r = 0; r<N; r++) { for (int c = 0; c<data.cols(); c++) { int num = value_to_bin_number(v_min, interval, data[r][c], nBin); probMap[r][num] += 1.0; } } probMap /= data.cols(); // normalization vcl_vector<double> optimalValues(N, 0); vcl_vector<int> numValues(N, 0); // multiple values from local dynamic programming vcl_vector<vcl_vector<double> > all_values(N); for (int i = 0; i<=N - windowSize; i++) { // get a local probMap; vnl_matrix<double> localProbMap = probMap.extract(windowSize, probMap.cols(), i, 0); vcl_vector<int> localOptimalBins; rgrsn_ldp::local_dynamic_programming(localProbMap, nJumpBin, localOptimalBins); assert(localOptimalBins.size() == windowSize); for (int j = 0; j < localOptimalBins.size(); j++) { double value = bin_number_to_value(v_min, interval, localOptimalBins[j]); assert(j + i < N); all_values[j + i].push_back(value); numValues[j + i] += 1; optimalValues[j + i] += value; } } // mean value for (int i = 0; i<optimalValues.size(); i++) { optimalValues[i] /= numValues[i]; } // variance medianSignal = vnl_vector<double>(N, 0); for (int i = 0; i<optimalValues.size(); i++) { assert(all_values[i].size() > 0); if (all_values[i].size() == 1) { medianSignal[i] = all_values[i][0]; } else { size_t n = all_values[i].size() / 2; vcl_nth_element(all_values[i].begin(), all_values[i].begin() + n, all_values[i].end()); medianSignal[i] = all_values[i][n]; } } optimalSignal = vnl_vector<double>(&optimalValues[0], (int)optimalValues.size()); return true; }