void RadialBasisFunction::Train(HostMatrix<float> &Input, HostMatrix<float> &Target){ //std::cout << "Training" << std::endl; // c_width = (float*) malloc(sizeof(float)*network_size); // memset(c_width,0,sizeof(float)*network_size); DeviceMatrix<float> device_X(Input); //std::cout << "KMeans" << std::endl; clock_t initialTime = clock(); KMeans KM; KM.SetSeed(seed); dCenters = KM.Execute(device_X,network_size); cudaThreadSynchronize(); times[0] = (clock() - initialTime); //std::cout << "Adjust Widths" << std::endl; /*Adjust width using mean of distance to neighbours*/ initialTime = clock(); AdjustWidths(number_neighbours); cudaThreadSynchronize(); times[1] = (clock() - initialTime); /*Training weights and scaling factor*/ HostMatrix<float> TargetArr(Target.Rows(),NumClasses); memset(TargetArr.Pointer(),0,sizeof(float)*TargetArr.Elements()); for(int i = 0; i < Target.Rows(); i++){ TargetArr(i,((int)Target(i,0)-1)) = 1; } DeviceMatrix<float> d_Target(TargetArr); //std::cout << "Calculating Weights" << std::endl; initialTime = clock(); DeviceMatrix<float> device_activ_matrix(device_X.Rows(),dCenters.Rows(),ColumnMajor); KernelActivationMatrix(device_activ_matrix.Pointer(),device_X.Pointer(),dCenters.Pointer(),device_X.Columns(),dCenters.Columns(),device_activ_matrix.Columns(),device_activ_matrix.Rows(),scaling_factor,device_c_width.Pointer()); DeviceMatrix<float> d_Aplus = UTILS::pseudoinverse(device_activ_matrix); dWeights = DeviceMatrix<float>(d_Aplus.Rows(),d_Target.Columns()); d_Aplus.Multiply(d_Aplus,d_Target,dWeights); /*Return Weights and Centers*/ cudaThreadSynchronize(); times[2] = (clock() - initialTime); // cudaMemcpy(c_width,device_c_width.Pointer(),sizeof(float)*device_c_width.Length(),cudaMemcpyDeviceToHost); // this->Weights = HostMatrix<float>(dWeights); // this->Centers = HostMatrix<float>(dCenters); }