void InterpolationSurrogateBuilder<V,M>::sync_data( std::vector<unsigned int>& local_n,
                                                      std::vector<double>& local_values,
                                                      InterpolationSurrogateData<V,M>& data )
    // Only members of the inter0comm will do the communication of the local values
    unsigned int my_subrank = data.get_paramDomain().env().subRank();

    if( my_subrank == 0 )
        std::vector<double> all_values(data.n_values());

        std::vector<unsigned int> all_indices(data.n_values());

        std::vector<int> strides;
        this->compute_strides( strides );

        const MpiComm& inter0comm = data.get_paramDomain().env().inter0Comm();

        /*! \todo Would be more efficient to pack local_n and local_values
            togethers and do Gatherv only once. */
        inter0comm.Gatherv( &local_n[0], local_n.size(), MPI_UNSIGNED,
                            &all_indices[0], &m_njobs[0], &strides[0], MPI_UNSIGNED,
                            0 /*root*/,
                            "MpiComm::gatherv() failed!" );

        inter0comm.Gatherv( &local_values[0], local_values.size(), MPI_DOUBLE,
                            &all_values[0], &m_njobs[0], &strides[0], MPI_DOUBLE,
                            0 /*root*/,
                            "MpiComm::gatherv() failed!" );

        // Now set the values.
        /* PB: Although we are guaranteed per-rank ordering of the data we gathered,
           I'm not sure we can assume the same continuity of the inter0 ranks, i.e.
           I'm not sure how QUESO ordered the inter0 ranks. So, we go ahead and
           manually set the values. */
        if( data.get_paramDomain().env().subRank() == 0 )
            for( unsigned int n = 0; n < data.n_values(); n++ )
              data.set_value( all_indices[n], all_values[n] );

    // Now broadcast the values data to all other processes
    data.sync_values( 0 /*root*/);
  FullModelComposition<Vec,Mat>::FullModelComposition( int argc, char** argv,
                                                       const QUESO::BaseEnvironment& queso_env,
                                                       const GetPot& model_input )
    : _model(ModelBuilder<Vec,Mat>::build_model(queso_env,model_input)),
                    model_input.vector_variable_size("Likelihood/datasets") )
    // Grab the datasets we'll be working with
    unsigned int n_datasets = model_input.vector_variable_size("Likelihood/datasets");

    std::vector<std::string> datasets(n_datasets);
    for( unsigned int d = 0; d < n_datasets; d++ )
        datasets[d] = model_input( "Likelihood/datasets", "DIE!", d );

    // This is the dataset the current set of processors is going to work on
    int dataset_index = this->_comm_handler.get_dataset_index();

    // Input for this dataset
    _forward_run_input.reset( new GetPot(datasets[dataset_index]) );

    // Setup data space, 2 datapoints per dataset
    unsigned int n_datapoints = 2*n_datasets;
    QUESO::VectorSpace<Vec,Mat> data_space( queso_env, "data_", n_datapoints, NULL);

    _observations.reset( data_space.newVector() );
    _covariance.reset( data_space.newVector() );

    // Now parse data values and the corresponding covariances
    // Each processor parses its own dataset
    // Then we'll gather/broadcast to everyone
    std::vector<double> local_values(2);
    std::vector<double> all_values(n_datapoints);

    // Convention, mass_loss is first, then avg_N
    local_values[0] = (*_forward_run_input)("MassLossLikelihood/data_value", 0.0);
    local_values[1] = (*_forward_run_input)("AverageNLikelihood/data_value", 0.0);

    if( _comm_handler.get_inter0_rank() >= 0 )
      MPI_Gather( &local_values[0], 2, MPI_DOUBLE,
                  &all_values[0], 2, MPI_DOUBLE, 0,
                  _comm_handler.get_inter_chain_0_comm() );

    MPI_Bcast( &all_values[0], n_datapoints, MPI_DOUBLE,
               0, _comm_handler.get_inter_chain_comm() );

    for( unsigned int i = 0; i < n_datapoints; i++ )
      (*_observations)[i] = all_values[i];

    local_values[0] = (*_forward_run_input)("MassLossLikelihood/sigma", -1.0);
    local_values[1] = (*_forward_run_input)("AverageNLikelihood/sigma", -1.0);

    if( _comm_handler.get_inter0_rank() >= 0 )
      MPI_Gather( &local_values[0], 2, MPI_DOUBLE,
                  &all_values[0], 2, MPI_DOUBLE, 0,
                  _comm_handler.get_inter_chain_0_comm() );

    MPI_Bcast( &all_values[0], n_datapoints, MPI_DOUBLE,
               0, _comm_handler.get_inter_chain_comm() );

    for( unsigned int i = 0; i < n_datapoints; i++ )
      (*_covariance)[i] = all_values[i];

    // Now setup model to be evaluated on this set of processors
    // We do this last because of the UFO check in GRINS
    _model_evaluator.reset( new FullModelEvaluator<Vec,Mat>(argc,argv,
                                                            *(_model.get())) );
bool rgrsn_ldp::dynamic_programming(const vnl_matrix<double> & data,
                                    double v_min, double v_max,
                                    unsigned int nBin,
                                    int nJumpBin,
                                    unsigned int windowSize,
                                    vnl_vector<double> & optimalSignal,
                                    vnl_vector<double> & signal_variance)
    assert(v_min < v_max);
    // raw data to probability map
    // quantilization
    const int N = data.rows();
    vnl_matrix<double> probMap = vnl_matrix<double>(N, nBin);
    double interval = (v_max - v_min)/nBin;
    for (int r = 0; r<N; r++) {
        for (int c = 0; c<data.cols(); c++) {
            int num = value_to_bin_number(v_min, interval, data[r][c], nBin);
            probMap[r][num] += 1.0;
    probMap /= data.cols(); // normalization
    vcl_vector<double> optimalValues(N, 0);
    vcl_vector<int> numValues(N, 0);      // multiple values from local dynamic programming
    vcl_vector<vcl_vector<double> > all_values(N);
    for (int i = 0; i<=N - windowSize; i++) {
        // get a local probMap;
        vnl_matrix<double> localProbMap = probMap.extract(windowSize, probMap.cols(), i, 0);
        vcl_vector<int> localOptimalBins;
        rgrsn_ldp::local_dynamic_programming(localProbMap, nJumpBin, localOptimalBins);
        assert(localOptimalBins.size() == windowSize);
        for (int j = 0; j < localOptimalBins.size(); j++) {
            double value = bin_number_to_value(v_min, interval, localOptimalBins[j]);
            assert(j + i < N);
            all_values[j + i].push_back(value);
            numValues[j + i]     += 1;
            optimalValues[j + i] += value;
    // mean value
    for (int i = 0; i<optimalValues.size(); i++) {
        optimalValues[i] /= numValues[i];
    // variance
    signal_variance = vnl_vector<double>(N, 0);
    for (int i = 0; i<optimalValues.size(); i++) {
        assert(all_values[i].size() > 0);
        if (all_values[i].size() == 1) {
            signal_variance[i] = 0.0001;
            double dump_mean = 0.0;
            double sigma = 0.0;
            VnlPlus::mean_std(&all_values[i][0], (int)all_values[i].size(), dump_mean, sigma);
            signal_variance[i] = sigma + 0.0001; // avoid zero
    optimalSignal = vnl_vector<double>(&optimalValues[0], (int)optimalValues.size());
    // save variance with the size of window size, for test purpose
        vcl_vector<vnl_vector<double> > all_value_vecs;
        for (int i = 0; i<all_values.size(); i++) {
            if (all_values[i].size() == windowSize) {
        vcl_string save_file("ldp_all_prediction.mat");
        vnl_matlab_filewrite awriter(save_file.c_str());
        awriter.write(VnlPlus::vector_2_mat(all_value_vecs), "ldp_all_opt_path");
        printf("save to %s\n", save_file.c_str());
    return true;
bool rgrsn_ldp::local_viterbi(const vnl_matrix<double> & data,
                              double resolution,
                              const vnl_vector<double> & transition,
                              unsigned int window_size,
                              vnl_vector<double> & optimal_signal,
                              vnl_vector<double> & signal_variance)
    assert(resolution > 0.0);
    assert(transition.size()%2 == 1);
    const double min_v = data.min_value();
    const double max_v = data.max_value();
    const int nBin = (max_v - min_v)/resolution;
    // raw data to probability map
    // quantilization
    const int N = data.rows();
    vnl_matrix<double> probMap = vnl_matrix<double>(N, nBin);
    for (int r = 0; r<N; r++) {
        for (int c = 0; c<data.cols(); c++) {
            int num = value_to_bin_number(min_v, resolution, data[r][c], nBin);
            probMap[r][num] += 1.0;
    probMap /= data.cols(); // normalization
    vcl_vector<double> optimalValues(N, 0);
    vcl_vector<int> numValues(N, 0);      // multiple values from local dynamic programming
    vcl_vector<vcl_vector<double> > all_values(N);   // for calculate variance
    for (int i = 0; i<=N - window_size; i++) {
        // get a local probMap;
        vnl_matrix<double> localProbMap = probMap.extract(window_size, probMap.cols(), i, 0);
        vcl_vector<int> localOptimalBins;
        rgrsn_ldp::viterbi(localProbMap, transition, localOptimalBins);
        assert(localOptimalBins.size() == window_size);
        for (int j = 0; j < localOptimalBins.size(); j++) {
            double value = bin_number_to_value(min_v, resolution, localOptimalBins[j]);
            numValues[j + i]     += 1;
            optimalValues[j + i] += value;
            all_values[j + i].push_back(value);
    for (int i = 0; i<optimalValues.size(); i++) {
        optimalValues[i] /= numValues[i];
    optimal_signal = vnl_vector<double>(&optimalValues[0], (int)optimalValues.size());
        vcl_vector<vnl_vector<double> > all_value_vecs;
        for (int i = 0; i<all_values.size(); i++) {
            if (all_values[i].size() == window_size) {
        vcl_string save_file("lv_all_prediction.mat");
        vnl_matlab_filewrite awriter(save_file.c_str());
        awriter.write(VnlPlus::vector_2_mat(all_value_vecs), "lv_all_opt_path");
        printf("save to %s\n", save_file.c_str());

    return true;
bool rgrsn_ldp::dynamic_programming_median(const vnl_matrix<double> & data,
                                           double v_min, double v_max,
                                           unsigned int nBin,
                                           int nJumpBin,
                                           unsigned int windowSize,
                                           vnl_vector<double> & optimalSignal,
                                           vnl_vector<double> & medianSignal)
    assert(v_min < v_max);
    // raw data to probability map
    // quantilization
    const int N = data.rows();
    vnl_matrix<double> probMap = vnl_matrix<double>(N, nBin);
    double interval = (v_max - v_min)/nBin;
    for (int r = 0; r<N; r++) {
        for (int c = 0; c<data.cols(); c++) {
            int num = value_to_bin_number(v_min, interval, data[r][c], nBin);
            probMap[r][num] += 1.0;
    probMap /= data.cols(); // normalization
    vcl_vector<double> optimalValues(N, 0);
    vcl_vector<int> numValues(N, 0);         // multiple values from local dynamic programming
    vcl_vector<vcl_vector<double> > all_values(N);
    for (int i = 0; i<=N - windowSize; i++) {
        // get a local probMap;
        vnl_matrix<double> localProbMap = probMap.extract(windowSize, probMap.cols(), i, 0);
        vcl_vector<int> localOptimalBins;
        rgrsn_ldp::local_dynamic_programming(localProbMap, nJumpBin, localOptimalBins);
        assert(localOptimalBins.size() == windowSize);
        for (int j = 0; j < localOptimalBins.size(); j++) {
            double value = bin_number_to_value(v_min, interval, localOptimalBins[j]);
            assert(j + i < N);
            all_values[j + i].push_back(value);
            numValues[j + i]     += 1;
            optimalValues[j + i] += value;
    // mean value
    for (int i = 0; i<optimalValues.size(); i++) {
        optimalValues[i] /= numValues[i];
    // variance
    medianSignal = vnl_vector<double>(N, 0);
    for (int i = 0; i<optimalValues.size(); i++) {
        assert(all_values[i].size() > 0);
        if (all_values[i].size() == 1) {
            medianSignal[i] = all_values[i][0];
            size_t n = all_values[i].size() / 2;
            vcl_nth_element(all_values[i].begin(), all_values[i].begin() + n, all_values[i].end());
            medianSignal[i] = all_values[i][n];
    optimalSignal = vnl_vector<double>(&optimalValues[0], (int)optimalValues.size());
    return true;