void nntrain(NN* nn, int numdata, const double** train_x, const double** train_y, int numepochs, int batchsize)
{
    int numbatches = numdata / batchsize;
    int iter, b, i, k;
    for(iter = 0; iter < numepochs; iter++){


        for(b=0; b< numbatches; b++){
            printf("Training NN %d / %d\tbatch:%d / %d\n", iter+1, numepochs, b+1, numbatches);
            

            set2DarrayZero( nn->layer[0].adw, nn->layer[0].units, nn->inputUnits );
            for(k=1; k< nn->n - 1; k++)
                set2DarrayZero( nn->layer[k].adw, nn->layer[k].units, nn->layer[k-1].units );
            
            for(i=0; i<batchsize; i++){
                nnff(nn, b*batchsize + i, train_x);
                nnbp(nn, b*batchsize + i, train_x, train_y);
            }

            nnapplygrads(nn, batchsize);

            // double errorRate = nneval(nn, batchsize, (const double**)&train_x[b*batchsize], (const double**)&train_y[b*batchsize]);
            // printf("error rate=%f\n", errorRate);
            

        }
        
        double errorRate = nneval(nn, numdata, (const double**)train_x, (const double**)train_y);
        printf("Full-batch train error rate=%f\n", errorRate);

        nn->learningRate =  nn->learningRate * nn->scaling_learningRate ;
    }

}
int main(int argc, char* argv[]){

    int c, i;
    char *inputImageFile = NULL;
    char *inputLabelFile = NULL;
    char *inputModel = "noname.model";
    char *outputResult = "result.txt";

    while(  (c=getopt(argc, argv, "i:y:m:o:h")) != -1 ){
        switch(c)
        {
            case 'h':
                printf("usage: \n-i input image file\n-m inputModel\n-o outputResult\n-y (optional)input label file\n\tif provided, it calculate accuracy immediately.\n");
                break;
            case 'i':
                inputImageFile = optarg;
                break;
            case 'y':
                inputLabelFile = optarg;
                break;
            case 'm':
                inputModel = optarg;
                break;
            case 'o':
                outputResult = optarg;
                break;
            case '?':
                printf("Illegal option\n");
                printf("usage: \n-i input image file\n-m inputModel\n-o outputResult\n-y (optional)input label file\n\tif provided, it calculate accuracy immediately.\n");
                exit(1);
                break;
            default:
                printf("usage: \n-i input image file\n-m inputModel\n-o outputResult\n-y (optional)input label file\n\tif provided, it calculate accuracy immediately.\n");
                exit(1);
                break;
        }

    }
    

    int test_num ; 
    double **test_x = mnist_load_data(inputImageFile, &test_num);

    int dim = 28 * 28;  // supposed data dimension

    // normalization parameters
    double* mean = (double*) malloc( dim * sizeof(double));  
    double* sigma = (double*) malloc( dim * sizeof(double)); 

    // load from model
    NN* mm = importModel(mean, sigma, inputModel);
    normalize_zscore_apply(test_x, test_num, mean, sigma, dim);

    if(inputLabelFile != NULL){
        double **test_y = mnist_load_labels(inputLabelFile, &test_num);
        double errorRate = nneval(mm, test_num, (const double**)test_x, (const double**) test_y);
        printf("Error rate=%f\n", errorRate);
    }


    // write result
    FILE* fp = fopen(outputResult, "w");
    for(i=0; i< test_num; i++){
        fprintf(fp, "%d\n", nnpredict(mm, i, (const double**)test_x) );
    }
    fclose(fp);

    return 0;
}
Example #3
0
  //trains a neural net
  void FBNN::train(const FMatrix & train_x, const FMatrix & train_y, const Opts & opts, const FMatrix & valid_x, const FMatrix & valid_y, const FBNN_ptr pFBNN)
  {
      int ibatchNum = train_x.rows() / opts.batchsize + (train_x.rows() % opts.batchsize != 0);
      FMatrix L = zeros(opts.numpochs * ibatchNum, 1);
      m_oLp = std::make_shared<FMatrix>(L);
      Loss loss;
//       std::cout << "numpochs = " << opts.numpochs << std::endl;
      for(int i = 0; i < opts.numpochs; ++i)
      {
	  std::cout << "start numpochs " << i << std::endl;	  
	  int elapsedTime = count_elapse_second([&train_x,&train_y,&L,&opts,i,pFBNN,ibatchNum,this]{
	    std::vector<int> iRandVec;
            randperm(train_x.rows(),iRandVec);
            std::cout << "start batch: ";
            for(int j = 0; j < ibatchNum; ++j)
            {
                std::cout << " " << j;
                if(pFBNN)//pull
                {
// 		    TMutex::scoped_lock lock;
// 		    lock.acquire(pFBNN->W_RWMutex,false);
// 		    lock.release();//reader lock tbb
		    boost::shared_lock<RWMutex> rlock(pFBNN->W_RWMutex);		    
                    set_m_oWs(pFBNN->get_m_oWs());
                    if(m_fMomentum > 0)
                        set_m_oVWs(pFBNN->get_m_oVWs());
		    rlock.unlock();
                }
                int curBatchSize = opts.batchsize;
                if(j == ibatchNum - 1 && train_x.rows() % opts.batchsize != 0)
                    curBatchSize = train_x.rows() % opts.batchsize;
                FMatrix batch_x(curBatchSize,train_x.columns());
                for(int r = 0; r < curBatchSize; ++r)//randperm()
                    row(batch_x,r) = row(train_x,iRandVec[j * opts.batchsize + r]);

                //Add noise to input (for use in denoising autoencoder)
                if(m_fInputZeroMaskedFraction != 0)
                    batch_x = bitWiseMul(batch_x,(rand(curBatchSize,train_x.columns())>m_fInputZeroMaskedFraction));

                FMatrix batch_y(curBatchSize,train_y.columns());
                for(int r = 0; r < curBatchSize; ++r)//randperm()
                    row(batch_y,r) = row(train_y,iRandVec[j * opts.batchsize + r]);

                L(i*ibatchNum+j,0) = nnff(batch_x,batch_y);
                nnbp();
                nnapplygrads();
                if(pFBNN)//push
                {
// 		    TMutex::scoped_lock lock;
// 		    lock.acquire(W_RWMutex);
// 		    lock.release();//writer lock tbb
		    boost::unique_lock<RWMutex> wlock(pFBNN->W_RWMutex);
                    pFBNN->set_m_odWs(m_odWs);
                    pFBNN->nnapplygrads();
		    wlock.unlock();
                }
// 	      std::cout << "end batch " << j << std::endl;
            }
            std::cout << std::endl;
	  });
	  std::cout << "elapsed time: " << elapsedTime << "s" << std::endl;
	  //loss calculate use nneval
	  if(valid_x.rows() == 0 || valid_y.rows() == 0){
	    nneval(loss, train_x, train_y);
	    std::cout << "Full-batch train mse = " << loss.train_error.back() << std::endl;
	  }
	  else{
	    nneval(loss, train_x, train_y, valid_x, valid_y);
	    std::cout << "Full-batch train mse = " << loss.train_error.back() << " , val mse = " << loss.valid_error.back() << std::endl;
	  }
	  std::cout << "epoch " << i+1 << " / " <<  opts.numpochs << " took " << elapsedTime << " seconds." << std::endl;
	  std::cout << "Mini-batch mean squared error on training set is " << columnMean(submatrix(L,i*ibatchNum,0UL,ibatchNum,L.columns())) << std::endl;      
	  m_iLearningRate *= m_fScalingLearningRate;    
	  
// 	  std::cout << "end numpochs " << i << std::endl;
      }

  }
Example #4
0
 void ff::FBNN::nneval(Loss & loss, const FMatrix& train_x, const FMatrix& train_y)
 {
   FMatrix emptyM;
   nneval(loss,train_x,train_y,emptyM,emptyM);
 }