void nntrain(NN* nn, int numdata, const double** train_x, const double** train_y, int numepochs, int batchsize)
{
    int numbatches = numdata / batchsize;
    int iter, b, i, k;
    for(iter = 0; iter < numepochs; iter++){


        for(b=0; b< numbatches; b++){
            printf("Training NN %d / %d\tbatch:%d / %d\n", iter+1, numepochs, b+1, numbatches);
            

            set2DarrayZero( nn->layer[0].adw, nn->layer[0].units, nn->inputUnits );
            for(k=1; k< nn->n - 1; k++)
                set2DarrayZero( nn->layer[k].adw, nn->layer[k].units, nn->layer[k-1].units );
            
            for(i=0; i<batchsize; i++){
                nnff(nn, b*batchsize + i, train_x);
                nnbp(nn, b*batchsize + i, train_x, train_y);
            }

            nnapplygrads(nn, batchsize);

            // double errorRate = nneval(nn, batchsize, (const double**)&train_x[b*batchsize], (const double**)&train_y[b*batchsize]);
            // printf("error rate=%f\n", errorRate);
            

        }
        
        double errorRate = nneval(nn, numdata, (const double**)train_x, (const double**)train_y);
        printf("Full-batch train error rate=%f\n", errorRate);

        nn->learningRate =  nn->learningRate * nn->scaling_learningRate ;
    }

}
Beispiel #2
0
  //trains a neural net
  void FBNN::train(const FMatrix & train_x, const FMatrix & train_y, const Opts & opts, const FMatrix & valid_x, const FMatrix & valid_y, const FBNN_ptr pFBNN)
  {
      int ibatchNum = train_x.rows() / opts.batchsize + (train_x.rows() % opts.batchsize != 0);
      FMatrix L = zeros(opts.numpochs * ibatchNum, 1);
      m_oLp = std::make_shared<FMatrix>(L);
      Loss loss;
//       std::cout << "numpochs = " << opts.numpochs << std::endl;
      for(int i = 0; i < opts.numpochs; ++i)
      {
	  std::cout << "start numpochs " << i << std::endl;	  
	  int elapsedTime = count_elapse_second([&train_x,&train_y,&L,&opts,i,pFBNN,ibatchNum,this]{
	    std::vector<int> iRandVec;
            randperm(train_x.rows(),iRandVec);
            std::cout << "start batch: ";
            for(int j = 0; j < ibatchNum; ++j)
            {
                std::cout << " " << j;
                if(pFBNN)//pull
                {
// 		    TMutex::scoped_lock lock;
// 		    lock.acquire(pFBNN->W_RWMutex,false);
// 		    lock.release();//reader lock tbb
		    boost::shared_lock<RWMutex> rlock(pFBNN->W_RWMutex);		    
                    set_m_oWs(pFBNN->get_m_oWs());
                    if(m_fMomentum > 0)
                        set_m_oVWs(pFBNN->get_m_oVWs());
		    rlock.unlock();
                }
                int curBatchSize = opts.batchsize;
                if(j == ibatchNum - 1 && train_x.rows() % opts.batchsize != 0)
                    curBatchSize = train_x.rows() % opts.batchsize;
                FMatrix batch_x(curBatchSize,train_x.columns());
                for(int r = 0; r < curBatchSize; ++r)//randperm()
                    row(batch_x,r) = row(train_x,iRandVec[j * opts.batchsize + r]);

                //Add noise to input (for use in denoising autoencoder)
                if(m_fInputZeroMaskedFraction != 0)
                    batch_x = bitWiseMul(batch_x,(rand(curBatchSize,train_x.columns())>m_fInputZeroMaskedFraction));

                FMatrix batch_y(curBatchSize,train_y.columns());
                for(int r = 0; r < curBatchSize; ++r)//randperm()
                    row(batch_y,r) = row(train_y,iRandVec[j * opts.batchsize + r]);

                L(i*ibatchNum+j,0) = nnff(batch_x,batch_y);
                nnbp();
                nnapplygrads();
                if(pFBNN)//push
                {
// 		    TMutex::scoped_lock lock;
// 		    lock.acquire(W_RWMutex);
// 		    lock.release();//writer lock tbb
		    boost::unique_lock<RWMutex> wlock(pFBNN->W_RWMutex);
                    pFBNN->set_m_odWs(m_odWs);
                    pFBNN->nnapplygrads();
		    wlock.unlock();
                }
// 	      std::cout << "end batch " << j << std::endl;
            }
            std::cout << std::endl;
	  });
	  std::cout << "elapsed time: " << elapsedTime << "s" << std::endl;
	  //loss calculate use nneval
	  if(valid_x.rows() == 0 || valid_y.rows() == 0){
	    nneval(loss, train_x, train_y);
	    std::cout << "Full-batch train mse = " << loss.train_error.back() << std::endl;
	  }
	  else{
	    nneval(loss, train_x, train_y, valid_x, valid_y);
	    std::cout << "Full-batch train mse = " << loss.train_error.back() << " , val mse = " << loss.valid_error.back() << std::endl;
	  }
	  std::cout << "epoch " << i+1 << " / " <<  opts.numpochs << " took " << elapsedTime << " seconds." << std::endl;
	  std::cout << "Mini-batch mean squared error on training set is " << columnMean(submatrix(L,i*ibatchNum,0UL,ibatchNum,L.columns())) << std::endl;      
	  m_iLearningRate *= m_fScalingLearningRate;    
	  
// 	  std::cout << "end numpochs " << i << std::endl;
      }

  }