Пример #1
0
void RadialBasisFunction::Train(HostMatrix<float> &Input, HostMatrix<float> &Target){

	//std::cout << "Training" << std::endl;

	//	c_width = (float*) malloc(sizeof(float)*network_size);
	//	memset(c_width,0,sizeof(float)*network_size);

	DeviceMatrix<float> device_X(Input);

	//std::cout << "KMeans" << std::endl;	
	clock_t initialTime = clock();
	KMeans KM;
	KM.SetSeed(seed);
	dCenters = KM.Execute(device_X,network_size);

	cudaThreadSynchronize();
	times[0] = (clock() - initialTime);

	//std::cout << "Adjust Widths" << std::endl;
	/*Adjust width using mean of distance to neighbours*/
	initialTime = clock();
	AdjustWidths(number_neighbours);

	cudaThreadSynchronize();
	times[1] = (clock() - initialTime);

	/*Training weights and scaling factor*/
	HostMatrix<float> TargetArr(Target.Rows(),NumClasses);
	memset(TargetArr.Pointer(),0,sizeof(float)*TargetArr.Elements());

	for(int i = 0; i < Target.Rows(); i++){
		TargetArr(i,((int)Target(i,0)-1)) = 1;
	}

	DeviceMatrix<float> d_Target(TargetArr);

	//std::cout << "Calculating Weights" << std::endl;

	initialTime = clock();

	DeviceMatrix<float> device_activ_matrix(device_X.Rows(),dCenters.Rows(),ColumnMajor);

	KernelActivationMatrix(device_activ_matrix.Pointer(),device_X.Pointer(),dCenters.Pointer(),device_X.Columns(),dCenters.Columns(),device_activ_matrix.Columns(),device_activ_matrix.Rows(),scaling_factor,device_c_width.Pointer());

	DeviceMatrix<float> d_Aplus = UTILS::pseudoinverse(device_activ_matrix);

	dWeights = DeviceMatrix<float>(d_Aplus.Rows(),d_Target.Columns());

	d_Aplus.Multiply(d_Aplus,d_Target,dWeights);


	/*Return Weights and Centers*/
	cudaThreadSynchronize();
	times[2] = (clock() - initialTime);

	// cudaMemcpy(c_width,device_c_width.Pointer(),sizeof(float)*device_c_width.Length(),cudaMemcpyDeviceToHost);
	//	this->Weights = HostMatrix<float>(dWeights);		
	//	this->Centers = HostMatrix<float>(dCenters);

}
Пример #2
0
void writeHeader(HostMatrix<float> &Input, int number_classes, int seed){

	cout << "\n=== Run information ===\n\n";

	cout << std::left << setw(50) << "Random Seed" << std::left << setw(20) << seed << endl;
	cout << std::left << setw(50) << "Number of Basis Functions" << std::left << setw(20) << NETWORK_SIZE << endl;
	cout << std::left << setw(50) << "Number of Neighbours for Width Estimation" << std::left << setw(20) << RNEIGHBOURS << endl;
	cout << std::left << setw(50) << "Number of Folds" << std::left << setw(20) << KFOLDS << endl;
	cout << std::left << setw(50) << "Number of Attributes" << std::left << setw(20) << Input.Columns() << endl;
	cout << std::left << setw(50) << "Number of Classes" << std::left << setw(20) << number_classes << endl;
	cout << std::left  << setw(50) << "Number of Instances" << std::left << setw(20) << Input.Rows() << endl;

}
void OCLAcceleratorMatrixBCSR<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) {

  const HostMatrixBCSR<ValueType> *cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // CPU to OCL copy
  if ((cast_mat = dynamic_cast<const HostMatrixBCSR<ValueType>*> (&src)) != NULL) {
    
  if (this->get_nnz() == 0)
    this->AllocateBCSR(src.get_nnz(), src.get_nrow(), src.get_ncol() );

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    cast_mat->get_nnz();

    FATAL_ERROR(__FILE__, __LINE__);    
    
  } else {
    
    LOG_INFO("Error unsupported OCL matrix type");
    this->info();
    src.info();
    FATAL_ERROR(__FILE__, __LINE__);
    
  }

}
Пример #4
0
/*Calculate the root mean square error of outputs against targets*/
double rmse_error(HostMatrix<float> &Target,HostMatrix<float> &Output){

	//check global error
	float sum = 0;
	int i;

	for(i = 0; i < Target.Rows(); i++){

		sum = sum + pow(Target(i,0) - Output(i,0),2);		

	}

	return sqrt(sum/Target.Rows());
}
Пример #5
0
void MICAcceleratorMatrixHYB<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) {

  const HostMatrixHYB<ValueType> *cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // CPU to MIC copy
  if ((cast_mat = dynamic_cast<const HostMatrixHYB<ValueType>*> (&src)) != NULL) {
    
  if (this->get_nnz() == 0)
    this->AllocateHYB(cast_mat->get_ell_nnz(), cast_mat->get_coo_nnz(), cast_mat->get_ell_max_row(),
                      cast_mat->get_nrow(), cast_mat->get_ncol());

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    if (this->get_ell_nnz() > 0) {

      copy_to_mic(this->local_backend_.MIC_dev,
		  cast_mat->mat_.ELL.val, this->mat_.ELL.val, this->get_ell_nnz());
      copy_to_mic(this->local_backend_.MIC_dev,
		  cast_mat->mat_.ELL.col, this->mat_.ELL.col, this->get_ell_nnz());
    }
    
    if (this->get_coo_nnz() > 0) {

      copy_to_mic(this->local_backend_.MIC_dev,
		  cast_mat->mat_.COO.row, this->mat_.COO.row, this->get_coo_nnz());
      copy_to_mic(this->local_backend_.MIC_dev,
		  cast_mat->mat_.COO.col, this->mat_.COO.col, this->get_coo_nnz());
      copy_to_mic(this->local_backend_.MIC_dev,
		  cast_mat->mat_.COO.val, this->mat_.COO.val, this->get_coo_nnz());

    }

  } else {
    
    LOG_INFO("Error unsupported MIC matrix type");
    this->info();
    src.info();
    FATAL_ERROR(__FILE__, __LINE__);
    
  }

}
Пример #6
0
void MICAcceleratorMatrixDIA<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) {

  const HostMatrixDIA<ValueType> *cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // CPU to MIC copy
  if ((cast_mat = dynamic_cast<const HostMatrixDIA<ValueType>*> (&src)) != NULL) {
    
  if (this->get_nnz() == 0)
    this->AllocateDIA(cast_mat->get_nnz(), cast_mat->get_nrow(), cast_mat->get_ncol(), cast_mat->get_ndiag());

    assert((this->get_nnz()  == src.get_nnz())  &&
	   (this->get_nrow() == src.get_nrow()) &&
	   (this->get_ncol() == src.get_ncol()) );

    if (this->get_nnz() > 0) {

      copy_to_mic(cast_mat->mat_.val, this->mat_.val, this->get_nnz());
      copy_to_mic(cast_mat->mat_.offset, this->mat_.offset, this->mat_.num_diag);

      /*
      // TODO

      for (int j=0; j<this->get_nnz(); ++j)
        this->mat_.val[j] = cast_mat->mat_.val[j];
      
      for (int j=0; j<this->mat_.num_diag; ++j)
        this->mat_.offset[j] = cast_mat->mat_.offset[j];
      */

    }
      
  } else {
    
    LOG_INFO("Error unsupported MIC matrix type");
    this->info();
    src.info();
    FATAL_ERROR(__FILE__, __LINE__);
    
  }

}
Пример #7
0
void OCLAcceleratorMatrixCOO<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) {

  const HostMatrixCOO<ValueType> *cast_mat;

  // copy only in the same format
  assert(this->get_mat_format() == src.get_mat_format());

  // CPU to OCL copy
  if ((cast_mat = dynamic_cast<const HostMatrixCOO<ValueType>*> (&src)) != NULL) {
    
    if (this->get_nnz() == 0)
      this->AllocateCOO(src.get_nnz(), src.get_nrow(), src.get_ncol() );

    if (this->get_nnz() > 0) {

      assert((this->get_nnz()  == src.get_nnz())  &&
             (this->get_nrow() == src.get_nrow()) &&
             (this->get_ncol() == src.get_ncol()) );

      // Copy object from host to device memory
      ocl_host2dev<int>(this->get_nnz(), // size
                        cast_mat->mat_.row, // src
                        this->mat_.row,     // dst
                        OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      // Copy object from host to device memory
      ocl_host2dev<int>(this->get_nnz(), // size
                        cast_mat->mat_.col, // src
                        this->mat_.col,     // dst
                        OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

      // Copy object from host to device memory
      ocl_host2dev<ValueType>(this->get_nnz(), // size
                              cast_mat->mat_.val, // src
                              this->mat_.val,     // dst
                              OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue );

    }
    
  } else {
    
    LOG_INFO("Error unsupported OCL matrix type");
    this->info();
    src.info();
    FATAL_ERROR(__FILE__, __LINE__);
    
  }

}
Пример #8
0
 HostMatrix<T> operator*(const HostMatrix<T>& m)
 {
     HostMatrix<T> mult(m.width_,height_);
     for(int row=0; row<mult.height_; ++row)
     for(int col=0; col<mult.width_; ++col)
     {
         T v = 0.0;
         for(int k=0; k<width_; ++k)
         {
             v+=getData(row,k)*m.getData(k,col);
         }
         mult.setData(row,col,v);
     }
     return mult;
 }
Пример #9
0
/*Count number of miscalculated outputs against targets*/
int error_calc(HostMatrix<float> &Target,HostMatrix<float> &Output){

	//check global error

	int i;

	int error = 0;

	for(i = 0; i < Target.Rows(); i++){

		if(Target(i,0) - Output(i,0) != 0){
			error += 1;
		}

	}

	return error;
}
Пример #10
0
void PointGroup<scalar_type>::solve(Timers& timers, bool compute_rmm, bool lda, bool compute_forces, bool compute_energy,
                                    double& energy, double* fort_forces_ptr)
{
  HostMatrix<scalar_type> rmm_output;
  uint group_m = total_functions();
  if (compute_rmm) { rmm_output.resize(group_m, group_m); rmm_output.zero(); }

  #if CPU_RECOMPUTE
  /** Compute functions **/
  timers.functions.start();
  compute_functions(compute_forces, !lda);
  timers.functions.pause();
  #endif

  // prepare rmm_input for this group
  timers.density.start();
  HostMatrix<scalar_type> rmm_input(group_m, group_m);
  get_rmm_input(rmm_input);
  timers.density.pause();

  HostMatrix<vec_type3> forces(total_nucleii(), 1); forces.zero();
  HostMatrix<vec_type3> dd;

  /******** each point *******/
  uint point = 0;
  for (list<Point>::const_iterator point_it = points.begin(); point_it != points.end(); ++point_it, ++point)
  {
    timers.density.start();

    /** density **/
    scalar_type partial_density = 0;
    vec_type3 dxyz(0,0,0);
    vec_type3 dd1(0,0,0);
    vec_type3 dd2(0,0,0);

    if (lda) {
      for (uint i = 0; i < group_m; i++) {
        float w = 0.0;
        float Fi = function_values(i, point);
        for (uint j = i; j < group_m; j++) {
          scalar_type Fj = function_values(j, point);
          w += rmm_input(j, i) * Fj;
        }
        partial_density += Fi * w;
      }
    }
    else {
      for (uint i = 0; i < group_m; i++) {
        float w = 0.0;
        vec_type3 w3(0,0,0);
        vec_type3 ww1(0,0,0);
        vec_type3 ww2(0,0,0);

        scalar_type Fi = function_values(i, point);
        vec_type3 Fgi(gradient_values(i, point));
        vec_type3 Fhi1(hessian_values(2 * (i + 0) + 0, point));
        vec_type3 Fhi2(hessian_values(2 * (i + 0) + 1, point));

        for (uint j = 0; j <= i; j++) {
          scalar_type rmm = rmm_input(j,i);
          scalar_type Fj = function_values(j, point);
          w += Fj * rmm;

          vec_type3 Fgj(gradient_values(j, point));
          w3 += Fgj * rmm;

          vec_type3 Fhj1(hessian_values(2 * (j + 0) + 0, point));
          vec_type3 Fhj2(hessian_values(2 * (j + 0) + 1, point));
          ww1 += Fhj1 * rmm;
          ww2 += Fhj2 * rmm;
        }
        partial_density += Fi * w;

        dxyz += Fgi * w + w3 * Fi;
        dd1 += Fgi * w3 * 2 + Fhi1 * w + ww1 * Fi;

        vec_type3 FgXXY(Fgi.x(), Fgi.x(), Fgi.y());
        vec_type3 w3YZZ(w3.y(), w3.z(), w3.z());
        vec_type3 FgiYZZ(Fgi.y(), Fgi.z(), Fgi.z());
        vec_type3 w3XXY(w3.x(), w3.x(), w3.y());

        dd2 += FgXXY * w3YZZ + FgiYZZ * w3XXY + Fhi2 * w + ww2 * Fi;
      }
    }
    timers.density.pause();

    timers.forces.start();
    /** density derivatives **/
    if (compute_forces) {
      dd.resize(total_nucleii(), 1); dd.zero();
      for (uint i = 0, ii = 0; i < total_functions_simple(); i++) {
        uint nuc = func2local_nuc(ii);
        uint inc_i = small_function_type(i);
        vec_type3 this_dd = vec_type3(0,0,0);
        for (uint k = 0; k < inc_i; k++, ii++) {
          scalar_type w = 0.0;
          for (uint j = 0; j < group_m; j++) {
            scalar_type Fj = function_values(j, point);
            w += rmm_input(j, ii) * Fj * (ii == j ? 2 : 1);
          }
          this_dd -= gradient_values(ii, point) * w;
        }
        dd(nuc) += this_dd;
      }
    }
    timers.forces.pause();

    timers.pot.start();

    timers.density.start();
    /** energy / potential **/
    scalar_type exc = 0, corr = 0, y2a = 0;
    if (lda)
      cpu_pot(partial_density, exc, corr, y2a);
    else {
      cpu_potg(partial_density, dxyz, dd1, dd2, exc, corr, y2a);
    }

    timers.pot.pause();

    if (compute_energy)
      energy += (partial_density * point_it->weight) * (exc + corr);
    timers.density.pause();


    /** forces **/
    timers.forces.start();
    if (compute_forces) {
      scalar_type factor = point_it->weight * y2a;
      for (uint i = 0; i < total_nucleii(); i++)
        forces(i) += dd(i) * factor;
    }
    timers.forces.pause();

    /** RMM **/
    timers.rmm.start();
    if (compute_rmm) {
      scalar_type factor = point_it->weight * y2a;
      HostMatrix<scalar_type>::blas_ssyr(LowerTriangle, factor, function_values, rmm_output, point);
    }
    timers.rmm.pause();
  }

  timers.forces.start();
  /* accumulate force results for this group */
  if (compute_forces) {
    FortranMatrix<double> fort_forces(fort_forces_ptr, fortran_vars.atoms, 3, fortran_vars.max_atoms); // TODO: mover esto a init.cpp
    for (uint i = 0; i < total_nucleii(); i++) {
      uint global_atom = local2global_nuc[i];
      vec_type3 this_force = forces(i);
      fort_forces(global_atom,0) += this_force.x();
      fort_forces(global_atom,1) += this_force.y();
      fort_forces(global_atom,2) += this_force.z();
    }
  }
  timers.forces.pause();

  timers.rmm.start();
  /* accumulate RMM results for this group */
  if (compute_rmm) {
    for (uint i = 0, ii = 0; i < total_functions_simple(); i++) {
      uint inc_i = small_function_type(i);
      for (uint k = 0; k < inc_i; k++, ii++) {
        uint big_i = local2global_func[i] + k;

        for (uint j = 0, jj = 0; j < total_functions_simple(); j++) {
          uint inc_j = small_function_type(j);
          for (uint l = 0; l < inc_j; l++, jj++) {
            uint big_j = local2global_func[j] + l;
            if (big_i > big_j) continue;

            uint big_index = (big_i * fortran_vars.m - (big_i * (big_i - 1)) / 2) + (big_j - big_i);
            fortran_vars.rmm_output(big_index) += rmm_output(ii, jj);
          }
        }
      }
    }
  }
  timers.rmm.pause();

  #if CPU_RECOMPUTE
  /* clear functions */
  function_values.deallocate();
  gradient_values.deallocate();
  hessian_values.deallocate();
  #endif
}