void RadialBasisFunction::Train(HostMatrix<float> &Input, HostMatrix<float> &Target){ //std::cout << "Training" << std::endl; // c_width = (float*) malloc(sizeof(float)*network_size); // memset(c_width,0,sizeof(float)*network_size); DeviceMatrix<float> device_X(Input); //std::cout << "KMeans" << std::endl; clock_t initialTime = clock(); KMeans KM; KM.SetSeed(seed); dCenters = KM.Execute(device_X,network_size); cudaThreadSynchronize(); times[0] = (clock() - initialTime); //std::cout << "Adjust Widths" << std::endl; /*Adjust width using mean of distance to neighbours*/ initialTime = clock(); AdjustWidths(number_neighbours); cudaThreadSynchronize(); times[1] = (clock() - initialTime); /*Training weights and scaling factor*/ HostMatrix<float> TargetArr(Target.Rows(),NumClasses); memset(TargetArr.Pointer(),0,sizeof(float)*TargetArr.Elements()); for(int i = 0; i < Target.Rows(); i++){ TargetArr(i,((int)Target(i,0)-1)) = 1; } DeviceMatrix<float> d_Target(TargetArr); //std::cout << "Calculating Weights" << std::endl; initialTime = clock(); DeviceMatrix<float> device_activ_matrix(device_X.Rows(),dCenters.Rows(),ColumnMajor); KernelActivationMatrix(device_activ_matrix.Pointer(),device_X.Pointer(),dCenters.Pointer(),device_X.Columns(),dCenters.Columns(),device_activ_matrix.Columns(),device_activ_matrix.Rows(),scaling_factor,device_c_width.Pointer()); DeviceMatrix<float> d_Aplus = UTILS::pseudoinverse(device_activ_matrix); dWeights = DeviceMatrix<float>(d_Aplus.Rows(),d_Target.Columns()); d_Aplus.Multiply(d_Aplus,d_Target,dWeights); /*Return Weights and Centers*/ cudaThreadSynchronize(); times[2] = (clock() - initialTime); // cudaMemcpy(c_width,device_c_width.Pointer(),sizeof(float)*device_c_width.Length(),cudaMemcpyDeviceToHost); // this->Weights = HostMatrix<float>(dWeights); // this->Centers = HostMatrix<float>(dCenters); }
void writeHeader(HostMatrix<float> &Input, int number_classes, int seed){ cout << "\n=== Run information ===\n\n"; cout << std::left << setw(50) << "Random Seed" << std::left << setw(20) << seed << endl; cout << std::left << setw(50) << "Number of Basis Functions" << std::left << setw(20) << NETWORK_SIZE << endl; cout << std::left << setw(50) << "Number of Neighbours for Width Estimation" << std::left << setw(20) << RNEIGHBOURS << endl; cout << std::left << setw(50) << "Number of Folds" << std::left << setw(20) << KFOLDS << endl; cout << std::left << setw(50) << "Number of Attributes" << std::left << setw(20) << Input.Columns() << endl; cout << std::left << setw(50) << "Number of Classes" << std::left << setw(20) << number_classes << endl; cout << std::left << setw(50) << "Number of Instances" << std::left << setw(20) << Input.Rows() << endl; }
void OCLAcceleratorMatrixBCSR<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) { const HostMatrixBCSR<ValueType> *cast_mat; // copy only in the same format assert(this->get_mat_format() == src.get_mat_format()); // CPU to OCL copy if ((cast_mat = dynamic_cast<const HostMatrixBCSR<ValueType>*> (&src)) != NULL) { if (this->get_nnz() == 0) this->AllocateBCSR(src.get_nnz(), src.get_nrow(), src.get_ncol() ); assert((this->get_nnz() == src.get_nnz()) && (this->get_nrow() == src.get_nrow()) && (this->get_ncol() == src.get_ncol()) ); cast_mat->get_nnz(); FATAL_ERROR(__FILE__, __LINE__); } else { LOG_INFO("Error unsupported OCL matrix type"); this->info(); src.info(); FATAL_ERROR(__FILE__, __LINE__); } }
/*Calculate the root mean square error of outputs against targets*/ double rmse_error(HostMatrix<float> &Target,HostMatrix<float> &Output){ //check global error float sum = 0; int i; for(i = 0; i < Target.Rows(); i++){ sum = sum + pow(Target(i,0) - Output(i,0),2); } return sqrt(sum/Target.Rows()); }
void MICAcceleratorMatrixHYB<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) { const HostMatrixHYB<ValueType> *cast_mat; // copy only in the same format assert(this->get_mat_format() == src.get_mat_format()); // CPU to MIC copy if ((cast_mat = dynamic_cast<const HostMatrixHYB<ValueType>*> (&src)) != NULL) { if (this->get_nnz() == 0) this->AllocateHYB(cast_mat->get_ell_nnz(), cast_mat->get_coo_nnz(), cast_mat->get_ell_max_row(), cast_mat->get_nrow(), cast_mat->get_ncol()); assert((this->get_nnz() == src.get_nnz()) && (this->get_nrow() == src.get_nrow()) && (this->get_ncol() == src.get_ncol()) ); if (this->get_ell_nnz() > 0) { copy_to_mic(this->local_backend_.MIC_dev, cast_mat->mat_.ELL.val, this->mat_.ELL.val, this->get_ell_nnz()); copy_to_mic(this->local_backend_.MIC_dev, cast_mat->mat_.ELL.col, this->mat_.ELL.col, this->get_ell_nnz()); } if (this->get_coo_nnz() > 0) { copy_to_mic(this->local_backend_.MIC_dev, cast_mat->mat_.COO.row, this->mat_.COO.row, this->get_coo_nnz()); copy_to_mic(this->local_backend_.MIC_dev, cast_mat->mat_.COO.col, this->mat_.COO.col, this->get_coo_nnz()); copy_to_mic(this->local_backend_.MIC_dev, cast_mat->mat_.COO.val, this->mat_.COO.val, this->get_coo_nnz()); } } else { LOG_INFO("Error unsupported MIC matrix type"); this->info(); src.info(); FATAL_ERROR(__FILE__, __LINE__); } }
void MICAcceleratorMatrixDIA<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) { const HostMatrixDIA<ValueType> *cast_mat; // copy only in the same format assert(this->get_mat_format() == src.get_mat_format()); // CPU to MIC copy if ((cast_mat = dynamic_cast<const HostMatrixDIA<ValueType>*> (&src)) != NULL) { if (this->get_nnz() == 0) this->AllocateDIA(cast_mat->get_nnz(), cast_mat->get_nrow(), cast_mat->get_ncol(), cast_mat->get_ndiag()); assert((this->get_nnz() == src.get_nnz()) && (this->get_nrow() == src.get_nrow()) && (this->get_ncol() == src.get_ncol()) ); if (this->get_nnz() > 0) { copy_to_mic(cast_mat->mat_.val, this->mat_.val, this->get_nnz()); copy_to_mic(cast_mat->mat_.offset, this->mat_.offset, this->mat_.num_diag); /* // TODO for (int j=0; j<this->get_nnz(); ++j) this->mat_.val[j] = cast_mat->mat_.val[j]; for (int j=0; j<this->mat_.num_diag; ++j) this->mat_.offset[j] = cast_mat->mat_.offset[j]; */ } } else { LOG_INFO("Error unsupported MIC matrix type"); this->info(); src.info(); FATAL_ERROR(__FILE__, __LINE__); } }
void OCLAcceleratorMatrixCOO<ValueType>::CopyFromHost(const HostMatrix<ValueType> &src) { const HostMatrixCOO<ValueType> *cast_mat; // copy only in the same format assert(this->get_mat_format() == src.get_mat_format()); // CPU to OCL copy if ((cast_mat = dynamic_cast<const HostMatrixCOO<ValueType>*> (&src)) != NULL) { if (this->get_nnz() == 0) this->AllocateCOO(src.get_nnz(), src.get_nrow(), src.get_ncol() ); if (this->get_nnz() > 0) { assert((this->get_nnz() == src.get_nnz()) && (this->get_nrow() == src.get_nrow()) && (this->get_ncol() == src.get_ncol()) ); // Copy object from host to device memory ocl_host2dev<int>(this->get_nnz(), // size cast_mat->mat_.row, // src this->mat_.row, // dst OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue ); // Copy object from host to device memory ocl_host2dev<int>(this->get_nnz(), // size cast_mat->mat_.col, // src this->mat_.col, // dst OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue ); // Copy object from host to device memory ocl_host2dev<ValueType>(this->get_nnz(), // size cast_mat->mat_.val, // src this->mat_.val, // dst OCL_HANDLE(this->local_backend_.OCL_handle)->OCL_cmdQueue ); } } else { LOG_INFO("Error unsupported OCL matrix type"); this->info(); src.info(); FATAL_ERROR(__FILE__, __LINE__); } }
HostMatrix<T> operator*(const HostMatrix<T>& m) { HostMatrix<T> mult(m.width_,height_); for(int row=0; row<mult.height_; ++row) for(int col=0; col<mult.width_; ++col) { T v = 0.0; for(int k=0; k<width_; ++k) { v+=getData(row,k)*m.getData(k,col); } mult.setData(row,col,v); } return mult; }
/*Count number of miscalculated outputs against targets*/ int error_calc(HostMatrix<float> &Target,HostMatrix<float> &Output){ //check global error int i; int error = 0; for(i = 0; i < Target.Rows(); i++){ if(Target(i,0) - Output(i,0) != 0){ error += 1; } } return error; }
void PointGroup<scalar_type>::solve(Timers& timers, bool compute_rmm, bool lda, bool compute_forces, bool compute_energy, double& energy, double* fort_forces_ptr) { HostMatrix<scalar_type> rmm_output; uint group_m = total_functions(); if (compute_rmm) { rmm_output.resize(group_m, group_m); rmm_output.zero(); } #if CPU_RECOMPUTE /** Compute functions **/ timers.functions.start(); compute_functions(compute_forces, !lda); timers.functions.pause(); #endif // prepare rmm_input for this group timers.density.start(); HostMatrix<scalar_type> rmm_input(group_m, group_m); get_rmm_input(rmm_input); timers.density.pause(); HostMatrix<vec_type3> forces(total_nucleii(), 1); forces.zero(); HostMatrix<vec_type3> dd; /******** each point *******/ uint point = 0; for (list<Point>::const_iterator point_it = points.begin(); point_it != points.end(); ++point_it, ++point) { timers.density.start(); /** density **/ scalar_type partial_density = 0; vec_type3 dxyz(0,0,0); vec_type3 dd1(0,0,0); vec_type3 dd2(0,0,0); if (lda) { for (uint i = 0; i < group_m; i++) { float w = 0.0; float Fi = function_values(i, point); for (uint j = i; j < group_m; j++) { scalar_type Fj = function_values(j, point); w += rmm_input(j, i) * Fj; } partial_density += Fi * w; } } else { for (uint i = 0; i < group_m; i++) { float w = 0.0; vec_type3 w3(0,0,0); vec_type3 ww1(0,0,0); vec_type3 ww2(0,0,0); scalar_type Fi = function_values(i, point); vec_type3 Fgi(gradient_values(i, point)); vec_type3 Fhi1(hessian_values(2 * (i + 0) + 0, point)); vec_type3 Fhi2(hessian_values(2 * (i + 0) + 1, point)); for (uint j = 0; j <= i; j++) { scalar_type rmm = rmm_input(j,i); scalar_type Fj = function_values(j, point); w += Fj * rmm; vec_type3 Fgj(gradient_values(j, point)); w3 += Fgj * rmm; vec_type3 Fhj1(hessian_values(2 * (j + 0) + 0, point)); vec_type3 Fhj2(hessian_values(2 * (j + 0) + 1, point)); ww1 += Fhj1 * rmm; ww2 += Fhj2 * rmm; } partial_density += Fi * w; dxyz += Fgi * w + w3 * Fi; dd1 += Fgi * w3 * 2 + Fhi1 * w + ww1 * Fi; vec_type3 FgXXY(Fgi.x(), Fgi.x(), Fgi.y()); vec_type3 w3YZZ(w3.y(), w3.z(), w3.z()); vec_type3 FgiYZZ(Fgi.y(), Fgi.z(), Fgi.z()); vec_type3 w3XXY(w3.x(), w3.x(), w3.y()); dd2 += FgXXY * w3YZZ + FgiYZZ * w3XXY + Fhi2 * w + ww2 * Fi; } } timers.density.pause(); timers.forces.start(); /** density derivatives **/ if (compute_forces) { dd.resize(total_nucleii(), 1); dd.zero(); for (uint i = 0, ii = 0; i < total_functions_simple(); i++) { uint nuc = func2local_nuc(ii); uint inc_i = small_function_type(i); vec_type3 this_dd = vec_type3(0,0,0); for (uint k = 0; k < inc_i; k++, ii++) { scalar_type w = 0.0; for (uint j = 0; j < group_m; j++) { scalar_type Fj = function_values(j, point); w += rmm_input(j, ii) * Fj * (ii == j ? 2 : 1); } this_dd -= gradient_values(ii, point) * w; } dd(nuc) += this_dd; } } timers.forces.pause(); timers.pot.start(); timers.density.start(); /** energy / potential **/ scalar_type exc = 0, corr = 0, y2a = 0; if (lda) cpu_pot(partial_density, exc, corr, y2a); else { cpu_potg(partial_density, dxyz, dd1, dd2, exc, corr, y2a); } timers.pot.pause(); if (compute_energy) energy += (partial_density * point_it->weight) * (exc + corr); timers.density.pause(); /** forces **/ timers.forces.start(); if (compute_forces) { scalar_type factor = point_it->weight * y2a; for (uint i = 0; i < total_nucleii(); i++) forces(i) += dd(i) * factor; } timers.forces.pause(); /** RMM **/ timers.rmm.start(); if (compute_rmm) { scalar_type factor = point_it->weight * y2a; HostMatrix<scalar_type>::blas_ssyr(LowerTriangle, factor, function_values, rmm_output, point); } timers.rmm.pause(); } timers.forces.start(); /* accumulate force results for this group */ if (compute_forces) { FortranMatrix<double> fort_forces(fort_forces_ptr, fortran_vars.atoms, 3, fortran_vars.max_atoms); // TODO: mover esto a init.cpp for (uint i = 0; i < total_nucleii(); i++) { uint global_atom = local2global_nuc[i]; vec_type3 this_force = forces(i); fort_forces(global_atom,0) += this_force.x(); fort_forces(global_atom,1) += this_force.y(); fort_forces(global_atom,2) += this_force.z(); } } timers.forces.pause(); timers.rmm.start(); /* accumulate RMM results for this group */ if (compute_rmm) { for (uint i = 0, ii = 0; i < total_functions_simple(); i++) { uint inc_i = small_function_type(i); for (uint k = 0; k < inc_i; k++, ii++) { uint big_i = local2global_func[i] + k; for (uint j = 0, jj = 0; j < total_functions_simple(); j++) { uint inc_j = small_function_type(j); for (uint l = 0; l < inc_j; l++, jj++) { uint big_j = local2global_func[j] + l; if (big_i > big_j) continue; uint big_index = (big_i * fortran_vars.m - (big_i * (big_i - 1)) / 2) + (big_j - big_i); fortran_vars.rmm_output(big_index) += rmm_output(ii, jj); } } } } } timers.rmm.pause(); #if CPU_RECOMPUTE /* clear functions */ function_values.deallocate(); gradient_values.deallocate(); hessian_values.deallocate(); #endif }