Пример #1
0
  /**
   * Return the average value of a column, which might be lagged or unlagged
   */
  double get_average (const int candidate, const int lag_index=-1) {
    /** 
     * Find out if we own this candidate. If we do, compute the average and
     * broadcast this average accross to everyone else.
     */
    double average;
    const int owner = interval_mapper (candidate);
    if (mpi_rank == owner) average = A.average (candidate, lag_index);
    MPI_Bcast (&average, 1, MPI_DOUBLE, owner, MPI_COMM_WORLD);

    return average;
  }
Пример #2
0
  /**
   * There are bunch of selected time-series for the current regressor (y).
   * We gather these selected time-series and solve the linear system.
   * Variables A, y, x, and r are class variables.
   *
   * @param[in] candidate The currently added candidate.
   *
   * CAVEAT: The assumption is that all the candidates selected prior to the 
   * current candidate are already in selected.
   */
  void add_group (const int candidate) {
    /* Figure out where in test_A this candidate should start */
    double* test_A_start = &(test_A[0]) + (selected.size()*(M*N));

    /** 
     * Find out if we own this candidate. If we do, then send the relevant 
     * columns across. Else, receive the relevant columns from the owner.
     */
    const int owner = interval_mapper (candidate);
    if (mpi_rank == owner) A.materialize_X (candidate, test_A_start);
    MPI_Bcast (test_A_start, (M*N), MPI_DOUBLE, owner, MPI_COMM_WORLD);
  }
Пример #3
0
	void SDSMCascadedShadowLayer::UpdateCascades(Camera const & camera, float4x4 const & light_view_proj,
			float3 const & light_space_border)
	{
		RenderFactory& rf = Context::Instance().RenderFactoryInstance();
		RenderEngine& re = rf.RenderEngineInstance();

		uint32_t const num_cascades = static_cast<uint32_t>(intervals_.size());
		uint32_t const copy_index = frame_index_ & 1;
		uint32_t const read_back_index = (0 == frame_index_) ? copy_index : !copy_index;

		if (cs_support_)
		{
			re.BindFrameBuffer(FrameBufferPtr());

			float max_blur_light_space = 8.0f / 1024;
			float3 max_cascade_scale(max_blur_light_space / light_space_border.x(),
				max_blur_light_space / light_space_border.y(),
				std::numeric_limits<float>::max());

			int const TILE_DIM = 128;

			int dispatch_x = (depth_tex_->Width(0) + TILE_DIM - 1) / TILE_DIM;
			int dispatch_y = (depth_tex_->Height(0) + TILE_DIM - 1) / TILE_DIM;

			*interval_buff_param_ = interval_buff_;
			*interval_buff_uint_param_ = interval_buff_;
			*interval_buff_read_param_ = interval_buff_;
			*cascade_min_buff_uint_param_ = cascade_min_buff_;
			*cascade_max_buff_uint_param_ = cascade_max_buff_;
			*cascade_min_buff_read_param_ = cascade_min_buff_;
			*cascade_max_buff_read_param_ = cascade_max_buff_;
			*scale_buff_param_ = scale_buff_;
			*bias_buff_param_ = bias_buff_;
			*depth_tex_param_ = depth_tex_;
			*num_cascades_param_ = static_cast<int32_t>(num_cascades);
			*inv_depth_width_height_param_ = float2(1.0f / depth_tex_->Width(0), 1.0f / depth_tex_->Height(0));
			*near_far_param_ = float2(camera.NearPlane(), camera.FarPlane());
			float4x4 const & inv_proj = camera.InverseProjMatrix();
			float3 upper_left = MathLib::transform_coord(float3(-1, +1, 1), inv_proj);
			float3 upper_right = MathLib::transform_coord(float3(+1, +1, 1), inv_proj);
			float3 lower_left = MathLib::transform_coord(float3(-1, -1, 1), inv_proj);
			*upper_left_param_ = upper_left;
			*xy_dir_param_ = float2(upper_right.x() - upper_left.x(), lower_left.y() - upper_left.y());
			*view_to_light_view_proj_param_ = camera.InverseViewMatrix() * light_view_proj;
			*light_space_border_param_ = light_space_border;
			*max_cascade_scale_param_ = max_cascade_scale;

			re.Dispatch(*clear_z_bounds_tech_, 1, 1, 1);
			re.Dispatch(*reduce_z_bounds_from_depth_tech_, dispatch_x, dispatch_y, 1);
			re.Dispatch(*compute_log_cascades_from_z_bounds_tech_, 1, 1, 1);
			re.Dispatch(*clear_cascade_bounds_tech_, 1, 1, 1);
			re.Dispatch(*reduce_bounds_from_depth_tech_, dispatch_x, dispatch_y, 1);
			re.Dispatch(*compute_custom_cascades_tech_, 1, 1, 1);

			interval_buff_->CopyToBuffer(*interval_cpu_buffs_[copy_index]);
			scale_buff_->CopyToBuffer(*scale_cpu_buffs_[copy_index]);
			bias_buff_->CopyToBuffer(*bias_cpu_buffs_[copy_index]);

			GraphicsBuffer::Mapper interval_mapper(*interval_cpu_buffs_[read_back_index], BA_Read_Only);
			GraphicsBuffer::Mapper scale_mapper(*scale_cpu_buffs_[read_back_index], BA_Read_Only);
			GraphicsBuffer::Mapper bias_mapper(*bias_cpu_buffs_[read_back_index], BA_Read_Only);
			float2* interval_ptr = interval_mapper.Pointer<float2>();
			float3* scale_ptr = scale_mapper.Pointer<float3>();
			float3* bias_ptr = bias_mapper.Pointer<float3>();

			for (size_t i = 0; i < intervals_.size(); ++ i)
			{
				float3 const & scale = scale_ptr[i];
				float3 const & bias = bias_ptr[i];

				intervals_[i] = interval_ptr[i];
				scales_[i] = scale;
				biases_[i] = bias;
			}
		}
		else
		{
			float2 const near_far(camera.NearPlane(), camera.FarPlane());

			reduce_z_bounds_from_depth_pp_->SetParam(1, near_far);
			reduce_z_bounds_from_depth_pp_->Apply();

			for (uint32_t i = 1; i < depth_deriative_tex_->NumMipMaps(); ++ i)
			{
				int width = depth_deriative_tex_->Width(i - 1);
				int height = depth_deriative_tex_->Height(i - 1);

				float delta_x = 1.0f / width;
				float delta_y = 1.0f / height;
				float4 delta_offset(delta_x, delta_y, -delta_x / 2, -delta_y / 2);			
				reduce_z_bounds_from_depth_mip_map_pp_->SetParam(0, delta_offset);

				reduce_z_bounds_from_depth_mip_map_pp_->OutputPin(0, depth_deriative_small_tex_, i - 1);
				reduce_z_bounds_from_depth_mip_map_pp_->Apply();

				int sw = depth_deriative_tex_->Width(i);
				int sh = depth_deriative_tex_->Height(i);

				depth_deriative_small_tex_->CopyToSubTexture2D(*depth_deriative_tex_, 0, i, 0, 0, sw, sh,
					0, i - 1, 0, 0, sw, sh);
			}

			compute_log_cascades_from_z_bounds_pp_->SetParam(1, static_cast<int32_t>(num_cascades));
			compute_log_cascades_from_z_bounds_pp_->SetParam(2, near_far);
			compute_log_cascades_from_z_bounds_pp_->Apply();

			interval_tex_->CopyToSubTexture2D(*interval_cpu_texs_[copy_index], 0, 0, 0, 0, num_cascades, 1,
				0, 0, 0, 0, num_cascades, 1);

			Texture::Mapper interval_mapper(*interval_cpu_texs_[read_back_index], 0, 0,
				TMA_Read_Only, 0, 0, num_cascades, 1);
			Vector_T<half, 2>* interval_ptr = interval_mapper.Pointer<Vector_T<half, 2> >();

			for (size_t i = 0; i < intervals_.size(); ++ i)
			{
				float2 const interval(static_cast<float>(interval_ptr[i].x()),
					static_cast<float>(interval_ptr[i].y()));

				AABBox aabb = CalcFrustumExtents(camera, interval.x(), interval.y(), light_view_proj);

				aabb &= AABBox(float3(-1, -1, -1), float3(+1, +1, +1));

				aabb.Min() -= light_space_border;
				aabb.Max() += light_space_border;

				aabb.Min().x() = +aabb.Min().x() * 0.5f + 0.5f;
				aabb.Min().y() = -aabb.Min().y() * 0.5f + 0.5f;
				aabb.Max().x() = +aabb.Max().x() * 0.5f + 0.5f;
				aabb.Max().y() = -aabb.Max().y() * 0.5f + 0.5f;

				std::swap(aabb.Min().y(), aabb.Max().y());

				float3 const scale = float3(1.0f, 1.0f, 1.0f) / (aabb.Max() - aabb.Min());
				float3 const bias = -aabb.Min() * scale;

				intervals_[i] = interval;
				scales_[i] = scale;
				biases_[i] = bias;
			}
		}

		this->UpdateCropMats();

		++ frame_index_;
	}
Пример #4
0
int main (int argc, char** argv) {
  /* Initialize MPI */
  MPI_Init (&argc, &argv);

  /* Figure out the rank and size */
  MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank);
  MPI_Comm_size (MPI_COMM_WORLD, &mpi_size);

  /* MPI sends argc and argv everywhere --- parse everywhere */
  parse_parameters (argc,argv);

  /**
   * Now, we read the input matrix, FORCED, and PROHIBIT maps. To do this
   * we first create a partition of the total space so that we know which
   * range of KPIs is ours. Input matrix is stored per KPI and the maps 
   * are also ordered according to KPIs although not all the KPIs need to
   * be present.
   */
  pfunc::space_1D kpi_space = 
    partitioner_t<int>::create (0, 
                                int_params[NUM_KPIS_INDEX], 
                                mpi_rank, 
                                mpi_size);
  std::pair<int,int> full_kpi_range (0, int_params[NUM_KPIS_INDEX]);
  std::pair<int,int> my_kpi_range (kpi_space.begin(), kpi_space.end());
  std::vector<double> values 
         ((my_kpi_range.second-my_kpi_range.first)*
           int_params [NUM_INTERVALS_INDEX]);
  int_vec_map_t prohibit_map;
  int_vec_map_t forced_map;
  std::vector<double> kpi_weights (int_params[NUM_KPIS_INDEX], 1.0);
                                                         
  read_dense_matrix (chr_params [INPUT_MATRIX_PATH_INDEX],
                     my_kpi_range,
                     values.begin());

  if (0!=strcmp ("",chr_params[PROHIBIT_LIST_PATH_INDEX])) {
    read_map (chr_params [PROHIBIT_LIST_PATH_INDEX], 
              prohibit_map);
  }

  if (0!=strcmp ("",chr_params[FORCED_LIST_PATH_INDEX])) {
    read_map (chr_params [FORCED_LIST_PATH_INDEX], 
              forced_map);
  }

  if (0!=strcmp ("",chr_params[FORCED_LIST_PATH_INDEX])) {
    read_dense_matrix (chr_params [KPI_WEIGHTS_PATH_INDEX],
                       full_kpi_range,
                       kpi_weights.begin());
  }


  if (4<int_params[DEBUG_INDEX]) {
    print_matrix (values.begin(), 
                  int_params[NUM_INTERVALS_INDEX], 
                  my_kpi_range.second- my_kpi_range.first, 
                  "A");
    
    print_map (prohibit_map.begin(),
               prohibit_map.end(),
               "PROHIBIT");

    print_map (forced_map.begin(),
               forced_map.end(),
               "FORCED");
  }

#if USE_PFUNC
  /**
   * Define the PFunc instance. Note that we HAVE TO USE PFUNC::USE_DEFAULT as
   * the type of the FUNCTOR so that we can use pfunc::parallel_reduce.
   */
  typedef
  pfunc::generator <pfunc::cilkS, /* Cilk-style scheduling */
                    pfunc::use_default, /* No task priorities needed */
                    pfunc::use_default /* any function type*/> generator_type;
  typedef generator_type::attribute attribute;
  typedef generator_type::task task;
  typedef generator_type::taskmgr taskmgr;

  /* Create an instance of PFunc if that is what is needed */
  taskmgr* global_taskmgr;
  const int n_queues = int_params [NUM_THREADS_INDEX];
  unsigned int* thds_per_q_arr = new unsigned int [n_queues];
  for (int i=0; i<n_queues; ++i) thds_per_q_arr [i] = ONE_STEP;
  global_taskmgr = new taskmgr (n_queues, thds_per_q_arr);
  delete [] thds_per_q_arr;

  /* Create a task handle for all the tasks that we will use */
    task root_task;
    attribute root_attribute (false /*nested*/, false /*grouped*/);
#endif

  /*************************************************************************/
  /*           Set the base case size for all the tasks                    */
  pfunc::space_1D::base_case_size = int_params [TASK_SIZE_INDEX];
  
  /*************************************************************************/
  /* Create a range mapper that knows about the ownership of each column */
  std::vector<int> column_intervals (mpi_size+1);
  partitioner_t<int>::intervals (0, 
                                 int_params[NUM_KPIS_INDEX], 
                                 mpi_size, 
                                 column_intervals.begin());
  typedef interval_mapper_t<std::vector<int> > interval_mapper_t;
  interval_mapper_t interval_mapper (column_intervals);

  /* Populate the data frame with the given input matrix */
  data_frame_t<double> data_frame (my_kpi_range.first,
                                   int_params [NUM_INTERVALS_INDEX],
                                   my_kpi_range.second-my_kpi_range.first,
                                   int_params [LAG_INDEX]);
  data_frame.set (values.begin(), values.end(), true);

  /* Compute the mean and the length of each of the materialized X columns */
  double normalization_time = micro_time ();
  typedef normalizer_t <data_frame_t<double>, 
                        identity_mapper_t<int> > my_normalizer_t;
  identity_mapper_t<int> identity_mapper;
  my_normalizer_t normalizer (&data_frame, &identity_mapper);

#if USE_PFUNC
  pfunc::parallel_reduce<generator_type, my_normalizer_t, pfunc::space_1D>
    normalize (kpi_space, normalizer, *global_taskmgr);
  pfunc::spawn (*global_taskmgr, root_task, root_attribute, normalize);
  pfunc::wait (*global_taskmgr, root_task);
#else
  normalizer (kpi_space);
#endif
  normalization_time = micro_time() - normalization_time;

  /*************************************************************************/
  /*  Rule out all the candidates that have no variation in their columns  */
  double selection_time = micro_time ();
  typedef selector_t <data_frame_t<double>, 
                      int_set_t,
                      identity_mapper_t<int> > my_selector_t;
  my_selector_t selector (&data_frame, &identity_mapper);

#if USE_PFUNC
  pfunc::parallel_reduce<generator_type, my_selector_t, pfunc::space_1D>
    select (kpi_space, selector, *global_taskmgr);
  pfunc::spawn (*global_taskmgr, root_task, root_attribute, select);
  pfunc::wait (*global_taskmgr, root_task);
#else
  selector (kpi_space);
#endif
  selection_time = micro_time() - selection_time;

  /*************************************************************************/
  /*  Factorize all the columns so that Xg'Xg is formed and ready to go    */
  double factorization_time = micro_time ();
  typedef factorizer_t <data_frame_t<double>, 
                        std::vector<double>,
                        identity_mapper_t<int>,
                        SolverType> my_factorizer_t;
  my_factorizer_t factorizer (&data_frame, 
                              &identity_mapper,
                              int_params[NUM_INTERVALS_INDEX]-
                              int_params[LAG_INDEX],
                              int_params[LAG_INDEX],
                              dbl_params[LAMBDA_RIDGE_INDEX]);

#if USE_PFUNC
  pfunc::parallel_reduce<generator_type, my_factorizer_t, pfunc::space_1D>
    factorize (kpi_space, factorizer, *global_taskmgr);
  pfunc::spawn (*global_taskmgr, root_task, root_attribute, factorize);
  pfunc::wait (*global_taskmgr, root_task);
#else
  factorizer (kpi_space);
#endif
  factorization_time = micro_time() - factorization_time;

  /*************************************************************************/
  double total_time = 0.0;
  random_filter_t<int> filter (int_params[RAND_SEED_INDEX],
                               dbl_params[SAMPLE_RATIO_INDEX]);

  /* For each KPI, build model and output it one by one */
  int num_kpis_processed = 0;
  for (int kpi=0; kpi<int_params[NUM_KPIS_INDEX]; ++kpi) { 

    /**
     * We need to figure out if this is a useless kpi, in which case, we 
     * will not bother with trying to form a model for this kpi. All we 
     * need to do is a BROADCAST from from the OWNER of this particular kpi.
     */
    int my_vote = 0; /* process */
    int result;
    if (false==filter(kpi) ||
        (selector.get_list().end()!=selector.get_list().find(kpi)))my_vote=1;
    MPI_Allreduce (&my_vote,
                   &result, 
                   1, 
                   MPI_INT, 
                   MPI_MAX, /*If there is a single 1 --- we all ranks get 1*/
                   MPI_COMM_WORLD);
    if (1 == result) continue;

    /* we are processing */
    ++num_kpis_processed;

    const int num_rows = (int_params[NUM_INTERVALS_INDEX]-
                          int_params[LAG_INDEX]);

    /* Populate 'y' */
    std::vector<double> y (num_rows);
    const int owner = interval_mapper (kpi);
    if (mpi_rank == owner) data_frame.materialize_Y (kpi, y.begin());
    MPI_Bcast (&(y[0]), num_rows, MPI_DOUBLE, owner, MPI_COMM_WORLD);

    /* 
     * Create space for 'beta'. As we are modeling a normalized and centered X
     * with normalized 'Y', we do not have to worry about the intercept --- we
     * simply need enough space for the coefficients --- (M-L). The length of
     * each beta is at most MAX_ITERS * LAG 
     */
    std::vector<double> beta (int_params[MAX_ITERS_INDEX] * 
                              int_params[LAG_INDEX]);

    /* Instantiate the modeler */
    typedef std::less<double> compare_t;
    typedef modeler_t<data_frame_t<double>, /* type for the data_frame */
                      std::vector<double>,  /* type for Y and BETA */
                      std::vector<int>,     /*type for storing KPI predictors*/
                      int_set_t,            /* type for FORCED and PROHIBIT */
                      SolverType,           /* type for the solver */
                      stopper_t,            /* stopping functor */
                      compare_t,            /* comparison operator */
                      interval_mapper_t,    /* determine ownership */
                      my_factorizer_t       /* type of factorizer */
#if USE_PFUNC
                      , generator_type      /* the generator type */
#endif
                      > my_modeler_t;
                      
    const double stop_factor =
      (STOP_ON_OBJ_GAIN==int_params[STOPPING_CRITERIA_INDEX]) ?
         dbl_params[MIN_OBJ_GAIN_INDEX]:dbl_params[MIN_BIC_GAIN_INDEX];
    const stopper_t stopper (stop_factor, int_params[STOPPING_CRITERIA_INDEX]); 

    /* Create a map of the prohibited regressors for this KPI */
    int_set_t prohibit_set;
    if (prohibit_map.end() != prohibit_map.find(kpi)) {
      prohibit_set.insert ((prohibit_map[kpi]).begin(),
                           (prohibit_map[kpi]).end());
    }
    /* Insert the candidates that we don't want screened */
    prohibit_set.insert (selector.get_list().begin(),
                         selector.get_list().end());

    /* Create a map of the forced regressors for this KPI */
    int_set_t forced_set;
    if (forced_map.end() != forced_map.find(kpi)) {
      forced_set.insert ((forced_map[kpi]).begin(),
                         (forced_map[kpi]).end());
    }

    /* Create an instance of the modeler */
    std::vector<int> selected;
    double variance;
    double intercept;
    my_modeler_t my_modeler (data_frame,  /* data frame */
                             y,           /* regressor */
                             beta,        /* the output */
                             selected,    /* the selected KPIs in order */
                             prohibit_set,/* prohibited regressors */
                             forced_set,  /* forced regressors */
                             kpi_weights, /* weights to use for each kpi */
                             variance,    /* variance */
                             intercept,   /* intercept */
                             kpi,         /* target */
                             stopper,     /* stopping criteria */
                             interval_mapper, /* determine ownership */
                             factorizer,  /* factorizer for Xg'Xg */
                             dbl_params[LAMBDA_RIDGE_INDEX], /*ridge penalty*/
                             num_rows, /* num rows */
                             int_params[LAG_INDEX],  /* num columns */  
                             int_params[MAX_ITERS_INDEX],
                             int_params[DEBUG_INDEX]
#if USE_PFUNC
                             ,global_taskmgr /* task manager for pfunc */
#endif
                             );

    /* Let the model compute */
    double time = micro_time ();
    my_modeler ();
    time = micro_time () - time;
    total_time += time;

    /* Print out the coefficients if asked for */
    if (ROOT==mpi_rank && 1<int_params[DEBUG_INDEX]) {
      printf ("Model for KPI %d (Variance=%lf, Intercept=%lf)\n", 
                                          kpi, variance, intercept);
      for (size_t i=0;i<selected.size();++i) {
        printf("%d (",selected[i]);
        for (int j=0; j<int_params[LAG_INDEX]; ++j) {
          printf ("%lf", beta[i*int_params[LAG_INDEX]+j]); 
          if (j!=(int_params[LAG_INDEX]-1)) printf(",");
        }
        printf(")\n");
      }
    }

    /* Print out the coefficients to file if asked for */
    if (ROOT==mpi_rank && 0<int_params[WRITE_FILES_INDEX]) {
	    const std::string base_dir    = chr_params[OUTPUT_FILE_PATH_INDEX];
	    const std::string par_path    = base_dir +  "/parents.txt";
	    const std::string coeffs_path = base_dir +  "/coeffs.txt";
	    const std::string var_path    = base_dir +  "/variance.txt";
	    const std::string int_path    = base_dir +  "/intercept.txt";

      std::ofstream par_file (par_path.c_str(), std::ios_base::app); 
      std::ofstream coeffs_file (coeffs_path.c_str(), std::ios_base::app); 
      std::ofstream var_file (var_path.c_str(), std::ios_base::app); 
      std::ofstream int_file (int_path.c_str(), std::ios_base::app);

      par_file << kpi << ":";
      coeffs_file << kpi << ":";
      var_file << kpi << ":";
      int_file << kpi << ":";

      for (size_t i=0;i<selected.size();++i) {
        par_file << selected[i] << " ";
        for (int j=0; j<int_params[LAG_INDEX]; ++j)
          coeffs_file << beta[i*int_params[LAG_INDEX]+j] << " ";
      }

      var_file << variance;
      int_file << intercept;

      par_file << "\n";
      coeffs_file << "\n";
      var_file << "\n";
      int_file << "\n";

      par_file.close();
      coeffs_file.close();
      var_file.close();
      int_file.close();
    }
  }

  if (ROOT==mpi_rank) 
    printf ("Built %d models in %lf (secs) at rate of %lf (per sec)\n",
      num_kpis_processed, total_time, total_time/num_kpis_processed);

#if USE_PFUNC
    delete global_taskmgr;
#endif

  /* Finalize MPI */
  MPI_Finalize ();

  return 0;
}