void nntrain(NN* nn, int numdata, const double** train_x, const double** train_y, int numepochs, int batchsize)
{
    int numbatches = numdata / batchsize;
    int iter, b, i, k;
    for(iter = 0; iter < numepochs; iter++){


        for(b=0; b< numbatches; b++){
            printf("Training NN %d / %d\tbatch:%d / %d\n", iter+1, numepochs, b+1, numbatches);
            

            set2DarrayZero( nn->layer[0].adw, nn->layer[0].units, nn->inputUnits );
            for(k=1; k< nn->n - 1; k++)
                set2DarrayZero( nn->layer[k].adw, nn->layer[k].units, nn->layer[k-1].units );
            
            for(i=0; i<batchsize; i++){
                nnff(nn, b*batchsize + i, train_x);
                nnbp(nn, b*batchsize + i, train_x, train_y);
            }

            nnapplygrads(nn, batchsize);

            // double errorRate = nneval(nn, batchsize, (const double**)&train_x[b*batchsize], (const double**)&train_y[b*batchsize]);
            // printf("error rate=%f\n", errorRate);
            

        }
        
        double errorRate = nneval(nn, numdata, (const double**)train_x, (const double**)train_y);
        printf("Full-batch train error rate=%f\n", errorRate);

        nn->learningRate =  nn->learningRate * nn->scaling_learningRate ;
    }

}
void saetrain(NN** saes, int numsaes, int numdata, const double** train_x, int numepochs, int batchsize)
{
    int i, j;
    double **train_data = (double**)train_x;
    double **new_train_data = NULL;

    for(i=0;i<numsaes; i++){
        printf("Training AE %d / %d\n", i+1, numsaes);

        if( i==0) {
            nntrain(saes[i], numdata, (const double**)train_data, (const double**)train_data, numepochs, batchsize);
        }else{
            // generate data for next SAE
            
            new_train_data = create2Darray( numdata, saes[i-1]->layer[0].units); 
            for(j=0; j<numdata; j++){
                nnff(saes[i-1], j, (const double**)train_data) ;
                memcpy( new_train_data[j], saes[i-1]->layer[0].a, saes[i-1]->layer[0].units);  
            }
            
            nntrain(saes[i], numdata, (const double**)new_train_data, (const double**)new_train_data, numepochs, batchsize);
            
            if( i > 1) free2Darray( train_data, numdata);
            if( i== numsaes -1) free2Darray( new_train_data, numdata);
            train_data = new_train_data;
        }
        

    }
}
示例#3
0
  void ff::FBNN::nnpredict(const FMatrix& x, const FMatrix& y, FColumn& labels)
  {
//     std::cout << "start nnpredict" << std::endl;
    m_fTesting = true;
    nnff(x,zeros(x.rows(),m_oArch[m_iN - 1]));
    m_fTesting = false;
    labels = rowMaxIndexes(*m_oAs[m_iN - 1]);
//     std::cout << "end nnpredict" << std::endl;
  }
示例#4
0
  //evaluates performance of neural network
  void ff::FBNN::nneval(Loss & loss, const FMatrix& train_x, const FMatrix& train_y, const FMatrix& valid_x, const FMatrix& valid_y)
  {
//     std::cout << "start nneval" << std::endl;
    m_fTesting = true;
    //training performance
    loss.train_error.push_back(nnff(train_x,train_y));
    
    //validation performance
    if(valid_x.rows() != 0 && valid_y.rows() != 0)
      loss.valid_error.push_back(nnff(valid_x,valid_y));
    
    m_fTesting = false;
    //calc misclassification rate if softmax
    if(m_strOutput == "softmax")
    {
      loss.train_error_fraction.push_back(nntest(train_x,train_y));
      if(valid_x.rows() != 0 && valid_y.rows() != 0)
	loss.valid_error_fraction.push_back(nntest(valid_x,valid_y));
    }
//     std::cout << "end nneval" << std::endl;
  }
示例#5
0
文件: cnn.c 项目: GilgameshD/Vivado
// 这里InputData是图像数据,inputData[r][c],r行c列,这里跟各权重模板是一致的
void cnnff(CNN* cnn,float** inputData)
{
	// 第一层的传播
	int i, j, r, c;

	// 第一层卷积层输出数据(C1)
	nSize mapSize = {cnn->C1.mapSize, cnn->C1.mapSize};
	nSize inSize = {cnn->C1.inputWidth, cnn->C1.inputHeight};
	nSize outSize = {cnn->S2.inputWidth, cnn->S2.inputHeight};

    float **wholeKernel = (float**)malloc(6 * sizeof(float*));
    for (i = 0; i < 6; i++)
        wholeKernel[i] = (float*)malloc(25 * sizeof(float));
    int cov1layerstart = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0);
	for(j = 0;j < (cnn->C1.inChannels);j++)
	{
		// 卷积
	    int m, n, k;
	    int l;
		for (m = 0; m < 6; m++)
		{
			l = 0;
			for (k = 0; k < 5; k++)
			{
				for (n = 0; n < 5; n++)
				{
					wholeKernel[m][l] = cnn->C1.mapData[j][m][4-k][4-n];
					l++;
				}
			}
		}
		float** mapout = cov_layer1_6(wholeKernel,mapSize,inputData,inSize,valid);
		// 求和
		k = 0;
		for (i = 0; i < 6; i++)
			for (m = 0; m < 24; m++)
				for (n = 0; n < 24; n++)
					cnn->C1.v[i][m][n] += mapout[i][24*m+n];
	}

	for(i = 0;i < (cnn->C1.outChannels);i++)
		for(r = 0;r < outSize.r;r++)
			for(c = 0;c < outSize.c;c++)
				// sigmoid function
				cnn->C1.y[i][r][c] = activation_Sigma(cnn->C1.v[i][r][c], cnn->C1.basicData[i]);


//	for(i=0;i<(cnn->C1.outChannels);i++)
//    {
//		for(j=0;j<(cnn->C1.inChannels);j++)
//        {
//			float** mapout=cov(cnn->C1.mapData[j][i],mapSize,inputData,inSize,valid);
//			addmat(cnn->C1.v[i],cnn->C1.v[i],outSize,mapout,outSize);
//			for(r=0;r<outSize.r;r++)
//				free(mapout[r]);
//			free(mapout);
//		}
//		for(r=0;r<outSize.r;r++)
//			for(c=0;c<outSize.c;c++)
//				cnn->C1.y[i][r][c]=activation_Sigma(cnn->C1.v[i][r][c],cnn->C1.basicData[i]);
//	}
	int cov1layerend = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0);
	xil_printf("%d cycles spent on cov2\n", cov1layerend - cov1layerstart);
	// 第二层的输出传播S2,采样层
	outSize.c = cnn->C3.inputWidth;
	outSize.r = cnn->C3.inputHeight;
	inSize.c = cnn->S2.inputWidth;
	inSize.r = cnn->S2.inputHeight;
	for(i = 0;i < (cnn->S2.outChannels);i++)
	{
		// pooling的类型是取平均
		if(cnn->S2.poolType == AvePool)
			avgPooling(cnn->S2.y[i], outSize, cnn->C1.y[i], inSize, cnn->S2.mapSize);
	}

///////////////////////////////////////////////////////////////////////////////////////////////////////////////
	outSize.c = cnn->S4.inputWidth;
	outSize.r = cnn->S4.inputHeight;
	inSize.c = cnn->C3.inputWidth;
	inSize.r = cnn->C3.inputHeight;
	mapSize.c = cnn->C3.mapSize;
	mapSize.r = cnn->C3.mapSize;
//	for(i = 0;i < (cnn->C3.outChannels);i++) // 12
//	{
//		for(j = 0;j < (cnn->C3.inChannels);j++) //6
//		{
//			float** mapout = cov(cnn->C3.mapData[j][i], mapSize, cnn->S2.y[j], inSize, valid);
//			addmat(cnn->C3.v[i],cnn->C3.v[i], outSize, mapout, outSize);
//		}
//
////		for(j = 0;j < (cnn->C3.inChannels);j++)
////		{
////			mapKernel[j] = cnn->C3.mapData[j][i];
////			float** mapout = cov_layer3(cnn->C3.mapData[j][i], mapSize, cnn->S2.y[j], inSize, valid);
////
////			addmat(cnn->C3.v[i],cnn->C3.v[i],outSize,mapout,outSize);
////		}
//
//		for(r = 0;r < outSize.r;r++)
//			for(c = 0;c < outSize.c;c++)
//				cnn->C3.y[i][r][c] = activation_Sigma(cnn->C3.v[i][r][c],cnn->C3.basicData[i]);
//	}
//    float **wholeKernel = (float**)malloc(6*sizeof(float*));
//    for (i = 0; i < 6; i++)
//        wholeKernel[i] = (float*)malloc(25 * sizeof(float));


    int cov2layerstart = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0);
	for(j = 0;j < (cnn->C3.inChannels);j++)
	{
	    int m, n, k;
	    int l;

	    // convert 6 kernel to vector
	    for (m = 0; m < 6; m++)
	    {
	        l = 0;
	        for (k = 0; k < 5; k++)
	        {
	            for (n = 0; n < 5; n++)
	            {
	                wholeKernel[m][l] = cnn->C3.mapData[j][m][4-k][4-n];
	                l++;
	            }
	        }
	    }
		float** mapout = cov_layer3_6(wholeKernel, mapSize, cnn->S2.y[j], inSize, valid);
		// add all mapout to the v
		for (i = 0; i < 6; i++)
			for (m = 0; m < 8; m++)
				for (n = 0; n < 8; n++)
					cnn->C3.v[i][m][n] += mapout[i][8*m+n];
		for(i = 0; i < 6;i++)
			free(mapout[i]);
		free(mapout);


	    for (m = 0; m < 6; m++)
	    {
	        l = 0;
	        for (k = 0; k < 5; k++)
	        {
	            for (n = 0; n < 5; n++)
	            {
	                wholeKernel[m][l] = cnn->C3.mapData[j][m+6][4-k][4-n];
	                l++;
	            }
	        }
	    }
		mapout = cov_layer3_6(wholeKernel, mapSize, cnn->S2.y[j], inSize, valid);
		for (; i < 12; i++)
			for (m = 0; m < 8; m++)
				for (n = 0; n < 8; n++)
					cnn->C3.v[i][m][n] += mapout[i-6][8*m+n];

		for(i = 0; i < 6;i++)
			free(mapout[i]);
		free(mapout);
	}
    for (i = 0; i < 6; i++)
        free(wholeKernel[i]);
    free(wholeKernel);

    for (i=0; i < cnn->C3.outChannels; i++)
		for(r=0;r<outSize.r;r++)
			for(c=0;c<outSize.c;c++)
				cnn->C3.y[i][r][c]=activation_Sigma(cnn->C3.v[i][r][c],cnn->C3.basicData[i]);
////////////////////////////////////////////////////////////////////////////////////////////////////
    int cov2layerend = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0);
    xil_printf("%d cycles spent on cov2\n", cov2layerend - cov2layerstart);
	// 第四层的Pooling层
	inSize.c=cnn->S4.inputWidth;
	inSize.r=cnn->S4.inputHeight;
	outSize.c=inSize.c/cnn->S4.mapSize;
	outSize.r=inSize.r/cnn->S4.mapSize;
	for(i=0;i<(cnn->S4.outChannels);i++)
	{
		if(cnn->S4.poolType == AvePool)
			avgPooling(cnn->S4.y[i],outSize,cnn->C3.y[i],inSize,cnn->S4.mapSize);
	}

	// 输出层O5的处理
	// 首先需要将前面的多维输出展开成一维向量
	float O5inData[192];
	for(i = 0;i < (cnn->S4.outChannels);i++)
		for(r = 0;r < outSize.r;r++)
			for(c = 0;c < outSize.c;c++)
				O5inData[i*outSize.r*outSize.c+r*outSize.c+c]=cnn->S4.y[i][r][c];

	nSize nnSize = {cnn->O5.inputNum, cnn->O5.outputNum};

	nnff(cnn->O5.v, O5inData, cnn->O5.wData, cnn->O5.basicData, nnSize);

	// 计算每一个数字的概率
	for(i = 0;i < cnn->O5.outputNum;i++)
		cnn->O5.y[i] = activation_Sigma(cnn->O5.v[i],cnn->O5.basicData[i]);
}
示例#6
0
  //trains a neural net
  void FBNN::train(const FMatrix & train_x, const FMatrix & train_y, const Opts & opts, const FMatrix & valid_x, const FMatrix & valid_y, const FBNN_ptr pFBNN)
  {
      int ibatchNum = train_x.rows() / opts.batchsize + (train_x.rows() % opts.batchsize != 0);
      FMatrix L = zeros(opts.numpochs * ibatchNum, 1);
      m_oLp = std::make_shared<FMatrix>(L);
      Loss loss;
//       std::cout << "numpochs = " << opts.numpochs << std::endl;
      for(int i = 0; i < opts.numpochs; ++i)
      {
	  std::cout << "start numpochs " << i << std::endl;	  
	  int elapsedTime = count_elapse_second([&train_x,&train_y,&L,&opts,i,pFBNN,ibatchNum,this]{
	    std::vector<int> iRandVec;
            randperm(train_x.rows(),iRandVec);
            std::cout << "start batch: ";
            for(int j = 0; j < ibatchNum; ++j)
            {
                std::cout << " " << j;
                if(pFBNN)//pull
                {
// 		    TMutex::scoped_lock lock;
// 		    lock.acquire(pFBNN->W_RWMutex,false);
// 		    lock.release();//reader lock tbb
		    boost::shared_lock<RWMutex> rlock(pFBNN->W_RWMutex);		    
                    set_m_oWs(pFBNN->get_m_oWs());
                    if(m_fMomentum > 0)
                        set_m_oVWs(pFBNN->get_m_oVWs());
		    rlock.unlock();
                }
                int curBatchSize = opts.batchsize;
                if(j == ibatchNum - 1 && train_x.rows() % opts.batchsize != 0)
                    curBatchSize = train_x.rows() % opts.batchsize;
                FMatrix batch_x(curBatchSize,train_x.columns());
                for(int r = 0; r < curBatchSize; ++r)//randperm()
                    row(batch_x,r) = row(train_x,iRandVec[j * opts.batchsize + r]);

                //Add noise to input (for use in denoising autoencoder)
                if(m_fInputZeroMaskedFraction != 0)
                    batch_x = bitWiseMul(batch_x,(rand(curBatchSize,train_x.columns())>m_fInputZeroMaskedFraction));

                FMatrix batch_y(curBatchSize,train_y.columns());
                for(int r = 0; r < curBatchSize; ++r)//randperm()
                    row(batch_y,r) = row(train_y,iRandVec[j * opts.batchsize + r]);

                L(i*ibatchNum+j,0) = nnff(batch_x,batch_y);
                nnbp();
                nnapplygrads();
                if(pFBNN)//push
                {
// 		    TMutex::scoped_lock lock;
// 		    lock.acquire(W_RWMutex);
// 		    lock.release();//writer lock tbb
		    boost::unique_lock<RWMutex> wlock(pFBNN->W_RWMutex);
                    pFBNN->set_m_odWs(m_odWs);
                    pFBNN->nnapplygrads();
		    wlock.unlock();
                }
// 	      std::cout << "end batch " << j << std::endl;
            }
            std::cout << std::endl;
	  });
	  std::cout << "elapsed time: " << elapsedTime << "s" << std::endl;
	  //loss calculate use nneval
	  if(valid_x.rows() == 0 || valid_y.rows() == 0){
	    nneval(loss, train_x, train_y);
	    std::cout << "Full-batch train mse = " << loss.train_error.back() << std::endl;
	  }
	  else{
	    nneval(loss, train_x, train_y, valid_x, valid_y);
	    std::cout << "Full-batch train mse = " << loss.train_error.back() << " , val mse = " << loss.valid_error.back() << std::endl;
	  }
	  std::cout << "epoch " << i+1 << " / " <<  opts.numpochs << " took " << elapsedTime << " seconds." << std::endl;
	  std::cout << "Mini-batch mean squared error on training set is " << columnMean(submatrix(L,i*ibatchNum,0UL,ibatchNum,L.columns())) << std::endl;      
	  m_iLearningRate *= m_fScalingLearningRate;    
	  
// 	  std::cout << "end numpochs " << i << std::endl;
      }

  }
int nnpredict(NN* nn, int data_index, const double** test_x)
{
    nnff(nn, data_index, test_x);
    return indOfMaxVal_double( nn->layer[ nn->n - 2 ].a,  nn->layer[ nn->n - 2 ].units ) ; //the last layer's a
}