Example #1
0
void Parameter_Reader::readParameters(const std::string& filename)
{
    std::string key;
    std::string value;

    std::ifstream par_file(filename.c_str());

    while(!par_file.eof())
    {
        par_file >> key;

        par_file >> value;

        parameters.insert(std::pair<std::string, std::string>(key,value));
    }
}
Example #2
0
int main (int argc, char** argv) {
  /* Initialize MPI */
  MPI_Init (&argc, &argv);

  /* Figure out the rank and size */
  MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank);
  MPI_Comm_size (MPI_COMM_WORLD, &mpi_size);

  /* MPI sends argc and argv everywhere --- parse everywhere */
  parse_parameters (argc,argv);

  /**
   * Now, we read the input matrix, FORCED, and PROHIBIT maps. To do this
   * we first create a partition of the total space so that we know which
   * range of KPIs is ours. Input matrix is stored per KPI and the maps 
   * are also ordered according to KPIs although not all the KPIs need to
   * be present.
   */
  pfunc::space_1D kpi_space = 
    partitioner_t<int>::create (0, 
                                int_params[NUM_KPIS_INDEX], 
                                mpi_rank, 
                                mpi_size);
  std::pair<int,int> full_kpi_range (0, int_params[NUM_KPIS_INDEX]);
  std::pair<int,int> my_kpi_range (kpi_space.begin(), kpi_space.end());
  std::vector<double> values 
         ((my_kpi_range.second-my_kpi_range.first)*
           int_params [NUM_INTERVALS_INDEX]);
  int_vec_map_t prohibit_map;
  int_vec_map_t forced_map;
  std::vector<double> kpi_weights (int_params[NUM_KPIS_INDEX], 1.0);
                                                         
  read_dense_matrix (chr_params [INPUT_MATRIX_PATH_INDEX],
                     my_kpi_range,
                     values.begin());

  if (0!=strcmp ("",chr_params[PROHIBIT_LIST_PATH_INDEX])) {
    read_map (chr_params [PROHIBIT_LIST_PATH_INDEX], 
              prohibit_map);
  }

  if (0!=strcmp ("",chr_params[FORCED_LIST_PATH_INDEX])) {
    read_map (chr_params [FORCED_LIST_PATH_INDEX], 
              forced_map);
  }

  if (0!=strcmp ("",chr_params[FORCED_LIST_PATH_INDEX])) {
    read_dense_matrix (chr_params [KPI_WEIGHTS_PATH_INDEX],
                       full_kpi_range,
                       kpi_weights.begin());
  }


  if (4<int_params[DEBUG_INDEX]) {
    print_matrix (values.begin(), 
                  int_params[NUM_INTERVALS_INDEX], 
                  my_kpi_range.second- my_kpi_range.first, 
                  "A");
    
    print_map (prohibit_map.begin(),
               prohibit_map.end(),
               "PROHIBIT");

    print_map (forced_map.begin(),
               forced_map.end(),
               "FORCED");
  }

#if USE_PFUNC
  /**
   * Define the PFunc instance. Note that we HAVE TO USE PFUNC::USE_DEFAULT as
   * the type of the FUNCTOR so that we can use pfunc::parallel_reduce.
   */
  typedef
  pfunc::generator <pfunc::cilkS, /* Cilk-style scheduling */
                    pfunc::use_default, /* No task priorities needed */
                    pfunc::use_default /* any function type*/> generator_type;
  typedef generator_type::attribute attribute;
  typedef generator_type::task task;
  typedef generator_type::taskmgr taskmgr;

  /* Create an instance of PFunc if that is what is needed */
  taskmgr* global_taskmgr;
  const int n_queues = int_params [NUM_THREADS_INDEX];
  unsigned int* thds_per_q_arr = new unsigned int [n_queues];
  for (int i=0; i<n_queues; ++i) thds_per_q_arr [i] = ONE_STEP;
  global_taskmgr = new taskmgr (n_queues, thds_per_q_arr);
  delete [] thds_per_q_arr;

  /* Create a task handle for all the tasks that we will use */
    task root_task;
    attribute root_attribute (false /*nested*/, false /*grouped*/);
#endif

  /*************************************************************************/
  /*           Set the base case size for all the tasks                    */
  pfunc::space_1D::base_case_size = int_params [TASK_SIZE_INDEX];
  
  /*************************************************************************/
  /* Create a range mapper that knows about the ownership of each column */
  std::vector<int> column_intervals (mpi_size+1);
  partitioner_t<int>::intervals (0, 
                                 int_params[NUM_KPIS_INDEX], 
                                 mpi_size, 
                                 column_intervals.begin());
  typedef interval_mapper_t<std::vector<int> > interval_mapper_t;
  interval_mapper_t interval_mapper (column_intervals);

  /* Populate the data frame with the given input matrix */
  data_frame_t<double> data_frame (my_kpi_range.first,
                                   int_params [NUM_INTERVALS_INDEX],
                                   my_kpi_range.second-my_kpi_range.first,
                                   int_params [LAG_INDEX]);
  data_frame.set (values.begin(), values.end(), true);

  /* Compute the mean and the length of each of the materialized X columns */
  double normalization_time = micro_time ();
  typedef normalizer_t <data_frame_t<double>, 
                        identity_mapper_t<int> > my_normalizer_t;
  identity_mapper_t<int> identity_mapper;
  my_normalizer_t normalizer (&data_frame, &identity_mapper);

#if USE_PFUNC
  pfunc::parallel_reduce<generator_type, my_normalizer_t, pfunc::space_1D>
    normalize (kpi_space, normalizer, *global_taskmgr);
  pfunc::spawn (*global_taskmgr, root_task, root_attribute, normalize);
  pfunc::wait (*global_taskmgr, root_task);
#else
  normalizer (kpi_space);
#endif
  normalization_time = micro_time() - normalization_time;

  /*************************************************************************/
  /*  Rule out all the candidates that have no variation in their columns  */
  double selection_time = micro_time ();
  typedef selector_t <data_frame_t<double>, 
                      int_set_t,
                      identity_mapper_t<int> > my_selector_t;
  my_selector_t selector (&data_frame, &identity_mapper);

#if USE_PFUNC
  pfunc::parallel_reduce<generator_type, my_selector_t, pfunc::space_1D>
    select (kpi_space, selector, *global_taskmgr);
  pfunc::spawn (*global_taskmgr, root_task, root_attribute, select);
  pfunc::wait (*global_taskmgr, root_task);
#else
  selector (kpi_space);
#endif
  selection_time = micro_time() - selection_time;

  /*************************************************************************/
  /*  Factorize all the columns so that Xg'Xg is formed and ready to go    */
  double factorization_time = micro_time ();
  typedef factorizer_t <data_frame_t<double>, 
                        std::vector<double>,
                        identity_mapper_t<int>,
                        SolverType> my_factorizer_t;
  my_factorizer_t factorizer (&data_frame, 
                              &identity_mapper,
                              int_params[NUM_INTERVALS_INDEX]-
                              int_params[LAG_INDEX],
                              int_params[LAG_INDEX],
                              dbl_params[LAMBDA_RIDGE_INDEX]);

#if USE_PFUNC
  pfunc::parallel_reduce<generator_type, my_factorizer_t, pfunc::space_1D>
    factorize (kpi_space, factorizer, *global_taskmgr);
  pfunc::spawn (*global_taskmgr, root_task, root_attribute, factorize);
  pfunc::wait (*global_taskmgr, root_task);
#else
  factorizer (kpi_space);
#endif
  factorization_time = micro_time() - factorization_time;

  /*************************************************************************/
  double total_time = 0.0;
  random_filter_t<int> filter (int_params[RAND_SEED_INDEX],
                               dbl_params[SAMPLE_RATIO_INDEX]);

  /* For each KPI, build model and output it one by one */
  int num_kpis_processed = 0;
  for (int kpi=0; kpi<int_params[NUM_KPIS_INDEX]; ++kpi) { 

    /**
     * We need to figure out if this is a useless kpi, in which case, we 
     * will not bother with trying to form a model for this kpi. All we 
     * need to do is a BROADCAST from from the OWNER of this particular kpi.
     */
    int my_vote = 0; /* process */
    int result;
    if (false==filter(kpi) ||
        (selector.get_list().end()!=selector.get_list().find(kpi)))my_vote=1;
    MPI_Allreduce (&my_vote,
                   &result, 
                   1, 
                   MPI_INT, 
                   MPI_MAX, /*If there is a single 1 --- we all ranks get 1*/
                   MPI_COMM_WORLD);
    if (1 == result) continue;

    /* we are processing */
    ++num_kpis_processed;

    const int num_rows = (int_params[NUM_INTERVALS_INDEX]-
                          int_params[LAG_INDEX]);

    /* Populate 'y' */
    std::vector<double> y (num_rows);
    const int owner = interval_mapper (kpi);
    if (mpi_rank == owner) data_frame.materialize_Y (kpi, y.begin());
    MPI_Bcast (&(y[0]), num_rows, MPI_DOUBLE, owner, MPI_COMM_WORLD);

    /* 
     * Create space for 'beta'. As we are modeling a normalized and centered X
     * with normalized 'Y', we do not have to worry about the intercept --- we
     * simply need enough space for the coefficients --- (M-L). The length of
     * each beta is at most MAX_ITERS * LAG 
     */
    std::vector<double> beta (int_params[MAX_ITERS_INDEX] * 
                              int_params[LAG_INDEX]);

    /* Instantiate the modeler */
    typedef std::less<double> compare_t;
    typedef modeler_t<data_frame_t<double>, /* type for the data_frame */
                      std::vector<double>,  /* type for Y and BETA */
                      std::vector<int>,     /*type for storing KPI predictors*/
                      int_set_t,            /* type for FORCED and PROHIBIT */
                      SolverType,           /* type for the solver */
                      stopper_t,            /* stopping functor */
                      compare_t,            /* comparison operator */
                      interval_mapper_t,    /* determine ownership */
                      my_factorizer_t       /* type of factorizer */
#if USE_PFUNC
                      , generator_type      /* the generator type */
#endif
                      > my_modeler_t;
                      
    const double stop_factor =
      (STOP_ON_OBJ_GAIN==int_params[STOPPING_CRITERIA_INDEX]) ?
         dbl_params[MIN_OBJ_GAIN_INDEX]:dbl_params[MIN_BIC_GAIN_INDEX];
    const stopper_t stopper (stop_factor, int_params[STOPPING_CRITERIA_INDEX]); 

    /* Create a map of the prohibited regressors for this KPI */
    int_set_t prohibit_set;
    if (prohibit_map.end() != prohibit_map.find(kpi)) {
      prohibit_set.insert ((prohibit_map[kpi]).begin(),
                           (prohibit_map[kpi]).end());
    }
    /* Insert the candidates that we don't want screened */
    prohibit_set.insert (selector.get_list().begin(),
                         selector.get_list().end());

    /* Create a map of the forced regressors for this KPI */
    int_set_t forced_set;
    if (forced_map.end() != forced_map.find(kpi)) {
      forced_set.insert ((forced_map[kpi]).begin(),
                         (forced_map[kpi]).end());
    }

    /* Create an instance of the modeler */
    std::vector<int> selected;
    double variance;
    double intercept;
    my_modeler_t my_modeler (data_frame,  /* data frame */
                             y,           /* regressor */
                             beta,        /* the output */
                             selected,    /* the selected KPIs in order */
                             prohibit_set,/* prohibited regressors */
                             forced_set,  /* forced regressors */
                             kpi_weights, /* weights to use for each kpi */
                             variance,    /* variance */
                             intercept,   /* intercept */
                             kpi,         /* target */
                             stopper,     /* stopping criteria */
                             interval_mapper, /* determine ownership */
                             factorizer,  /* factorizer for Xg'Xg */
                             dbl_params[LAMBDA_RIDGE_INDEX], /*ridge penalty*/
                             num_rows, /* num rows */
                             int_params[LAG_INDEX],  /* num columns */  
                             int_params[MAX_ITERS_INDEX],
                             int_params[DEBUG_INDEX]
#if USE_PFUNC
                             ,global_taskmgr /* task manager for pfunc */
#endif
                             );

    /* Let the model compute */
    double time = micro_time ();
    my_modeler ();
    time = micro_time () - time;
    total_time += time;

    /* Print out the coefficients if asked for */
    if (ROOT==mpi_rank && 1<int_params[DEBUG_INDEX]) {
      printf ("Model for KPI %d (Variance=%lf, Intercept=%lf)\n", 
                                          kpi, variance, intercept);
      for (size_t i=0;i<selected.size();++i) {
        printf("%d (",selected[i]);
        for (int j=0; j<int_params[LAG_INDEX]; ++j) {
          printf ("%lf", beta[i*int_params[LAG_INDEX]+j]); 
          if (j!=(int_params[LAG_INDEX]-1)) printf(",");
        }
        printf(")\n");
      }
    }

    /* Print out the coefficients to file if asked for */
    if (ROOT==mpi_rank && 0<int_params[WRITE_FILES_INDEX]) {
	    const std::string base_dir    = chr_params[OUTPUT_FILE_PATH_INDEX];
	    const std::string par_path    = base_dir +  "/parents.txt";
	    const std::string coeffs_path = base_dir +  "/coeffs.txt";
	    const std::string var_path    = base_dir +  "/variance.txt";
	    const std::string int_path    = base_dir +  "/intercept.txt";

      std::ofstream par_file (par_path.c_str(), std::ios_base::app); 
      std::ofstream coeffs_file (coeffs_path.c_str(), std::ios_base::app); 
      std::ofstream var_file (var_path.c_str(), std::ios_base::app); 
      std::ofstream int_file (int_path.c_str(), std::ios_base::app);

      par_file << kpi << ":";
      coeffs_file << kpi << ":";
      var_file << kpi << ":";
      int_file << kpi << ":";

      for (size_t i=0;i<selected.size();++i) {
        par_file << selected[i] << " ";
        for (int j=0; j<int_params[LAG_INDEX]; ++j)
          coeffs_file << beta[i*int_params[LAG_INDEX]+j] << " ";
      }

      var_file << variance;
      int_file << intercept;

      par_file << "\n";
      coeffs_file << "\n";
      var_file << "\n";
      int_file << "\n";

      par_file.close();
      coeffs_file.close();
      var_file.close();
      int_file.close();
    }
  }

  if (ROOT==mpi_rank) 
    printf ("Built %d models in %lf (secs) at rate of %lf (per sec)\n",
      num_kpis_processed, total_time, total_time/num_kpis_processed);

#if USE_PFUNC
    delete global_taskmgr;
#endif

  /* Finalize MPI */
  MPI_Finalize ();

  return 0;
}