void nntrain(NN* nn, int numdata, const double** train_x, const double** train_y, int numepochs, int batchsize) { int numbatches = numdata / batchsize; int iter, b, i, k; for(iter = 0; iter < numepochs; iter++){ for(b=0; b< numbatches; b++){ printf("Training NN %d / %d\tbatch:%d / %d\n", iter+1, numepochs, b+1, numbatches); set2DarrayZero( nn->layer[0].adw, nn->layer[0].units, nn->inputUnits ); for(k=1; k< nn->n - 1; k++) set2DarrayZero( nn->layer[k].adw, nn->layer[k].units, nn->layer[k-1].units ); for(i=0; i<batchsize; i++){ nnff(nn, b*batchsize + i, train_x); nnbp(nn, b*batchsize + i, train_x, train_y); } nnapplygrads(nn, batchsize); // double errorRate = nneval(nn, batchsize, (const double**)&train_x[b*batchsize], (const double**)&train_y[b*batchsize]); // printf("error rate=%f\n", errorRate); } double errorRate = nneval(nn, numdata, (const double**)train_x, (const double**)train_y); printf("Full-batch train error rate=%f\n", errorRate); nn->learningRate = nn->learningRate * nn->scaling_learningRate ; } }
void saetrain(NN** saes, int numsaes, int numdata, const double** train_x, int numepochs, int batchsize) { int i, j; double **train_data = (double**)train_x; double **new_train_data = NULL; for(i=0;i<numsaes; i++){ printf("Training AE %d / %d\n", i+1, numsaes); if( i==0) { nntrain(saes[i], numdata, (const double**)train_data, (const double**)train_data, numepochs, batchsize); }else{ // generate data for next SAE new_train_data = create2Darray( numdata, saes[i-1]->layer[0].units); for(j=0; j<numdata; j++){ nnff(saes[i-1], j, (const double**)train_data) ; memcpy( new_train_data[j], saes[i-1]->layer[0].a, saes[i-1]->layer[0].units); } nntrain(saes[i], numdata, (const double**)new_train_data, (const double**)new_train_data, numepochs, batchsize); if( i > 1) free2Darray( train_data, numdata); if( i== numsaes -1) free2Darray( new_train_data, numdata); train_data = new_train_data; } } }
void ff::FBNN::nnpredict(const FMatrix& x, const FMatrix& y, FColumn& labels) { // std::cout << "start nnpredict" << std::endl; m_fTesting = true; nnff(x,zeros(x.rows(),m_oArch[m_iN - 1])); m_fTesting = false; labels = rowMaxIndexes(*m_oAs[m_iN - 1]); // std::cout << "end nnpredict" << std::endl; }
//evaluates performance of neural network void ff::FBNN::nneval(Loss & loss, const FMatrix& train_x, const FMatrix& train_y, const FMatrix& valid_x, const FMatrix& valid_y) { // std::cout << "start nneval" << std::endl; m_fTesting = true; //training performance loss.train_error.push_back(nnff(train_x,train_y)); //validation performance if(valid_x.rows() != 0 && valid_y.rows() != 0) loss.valid_error.push_back(nnff(valid_x,valid_y)); m_fTesting = false; //calc misclassification rate if softmax if(m_strOutput == "softmax") { loss.train_error_fraction.push_back(nntest(train_x,train_y)); if(valid_x.rows() != 0 && valid_y.rows() != 0) loss.valid_error_fraction.push_back(nntest(valid_x,valid_y)); } // std::cout << "end nneval" << std::endl; }
// 这里InputData是图像数据,inputData[r][c],r行c列,这里跟各权重模板是一致的 void cnnff(CNN* cnn,float** inputData) { // 第一层的传播 int i, j, r, c; // 第一层卷积层输出数据(C1) nSize mapSize = {cnn->C1.mapSize, cnn->C1.mapSize}; nSize inSize = {cnn->C1.inputWidth, cnn->C1.inputHeight}; nSize outSize = {cnn->S2.inputWidth, cnn->S2.inputHeight}; float **wholeKernel = (float**)malloc(6 * sizeof(float*)); for (i = 0; i < 6; i++) wholeKernel[i] = (float*)malloc(25 * sizeof(float)); int cov1layerstart = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); for(j = 0;j < (cnn->C1.inChannels);j++) { // 卷积 int m, n, k; int l; for (m = 0; m < 6; m++) { l = 0; for (k = 0; k < 5; k++) { for (n = 0; n < 5; n++) { wholeKernel[m][l] = cnn->C1.mapData[j][m][4-k][4-n]; l++; } } } float** mapout = cov_layer1_6(wholeKernel,mapSize,inputData,inSize,valid); // 求和 k = 0; for (i = 0; i < 6; i++) for (m = 0; m < 24; m++) for (n = 0; n < 24; n++) cnn->C1.v[i][m][n] += mapout[i][24*m+n]; } for(i = 0;i < (cnn->C1.outChannels);i++) for(r = 0;r < outSize.r;r++) for(c = 0;c < outSize.c;c++) // sigmoid function cnn->C1.y[i][r][c] = activation_Sigma(cnn->C1.v[i][r][c], cnn->C1.basicData[i]); // for(i=0;i<(cnn->C1.outChannels);i++) // { // for(j=0;j<(cnn->C1.inChannels);j++) // { // float** mapout=cov(cnn->C1.mapData[j][i],mapSize,inputData,inSize,valid); // addmat(cnn->C1.v[i],cnn->C1.v[i],outSize,mapout,outSize); // for(r=0;r<outSize.r;r++) // free(mapout[r]); // free(mapout); // } // for(r=0;r<outSize.r;r++) // for(c=0;c<outSize.c;c++) // cnn->C1.y[i][r][c]=activation_Sigma(cnn->C1.v[i][r][c],cnn->C1.basicData[i]); // } int cov1layerend = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); xil_printf("%d cycles spent on cov2\n", cov1layerend - cov1layerstart); // 第二层的输出传播S2,采样层 outSize.c = cnn->C3.inputWidth; outSize.r = cnn->C3.inputHeight; inSize.c = cnn->S2.inputWidth; inSize.r = cnn->S2.inputHeight; for(i = 0;i < (cnn->S2.outChannels);i++) { // pooling的类型是取平均 if(cnn->S2.poolType == AvePool) avgPooling(cnn->S2.y[i], outSize, cnn->C1.y[i], inSize, cnn->S2.mapSize); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// outSize.c = cnn->S4.inputWidth; outSize.r = cnn->S4.inputHeight; inSize.c = cnn->C3.inputWidth; inSize.r = cnn->C3.inputHeight; mapSize.c = cnn->C3.mapSize; mapSize.r = cnn->C3.mapSize; // for(i = 0;i < (cnn->C3.outChannels);i++) // 12 // { // for(j = 0;j < (cnn->C3.inChannels);j++) //6 // { // float** mapout = cov(cnn->C3.mapData[j][i], mapSize, cnn->S2.y[j], inSize, valid); // addmat(cnn->C3.v[i],cnn->C3.v[i], outSize, mapout, outSize); // } // //// for(j = 0;j < (cnn->C3.inChannels);j++) //// { //// mapKernel[j] = cnn->C3.mapData[j][i]; //// float** mapout = cov_layer3(cnn->C3.mapData[j][i], mapSize, cnn->S2.y[j], inSize, valid); //// //// addmat(cnn->C3.v[i],cnn->C3.v[i],outSize,mapout,outSize); //// } // // for(r = 0;r < outSize.r;r++) // for(c = 0;c < outSize.c;c++) // cnn->C3.y[i][r][c] = activation_Sigma(cnn->C3.v[i][r][c],cnn->C3.basicData[i]); // } // float **wholeKernel = (float**)malloc(6*sizeof(float*)); // for (i = 0; i < 6; i++) // wholeKernel[i] = (float*)malloc(25 * sizeof(float)); int cov2layerstart = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); for(j = 0;j < (cnn->C3.inChannels);j++) { int m, n, k; int l; // convert 6 kernel to vector for (m = 0; m < 6; m++) { l = 0; for (k = 0; k < 5; k++) { for (n = 0; n < 5; n++) { wholeKernel[m][l] = cnn->C3.mapData[j][m][4-k][4-n]; l++; } } } float** mapout = cov_layer3_6(wholeKernel, mapSize, cnn->S2.y[j], inSize, valid); // add all mapout to the v for (i = 0; i < 6; i++) for (m = 0; m < 8; m++) for (n = 0; n < 8; n++) cnn->C3.v[i][m][n] += mapout[i][8*m+n]; for(i = 0; i < 6;i++) free(mapout[i]); free(mapout); for (m = 0; m < 6; m++) { l = 0; for (k = 0; k < 5; k++) { for (n = 0; n < 5; n++) { wholeKernel[m][l] = cnn->C3.mapData[j][m+6][4-k][4-n]; l++; } } } mapout = cov_layer3_6(wholeKernel, mapSize, cnn->S2.y[j], inSize, valid); for (; i < 12; i++) for (m = 0; m < 8; m++) for (n = 0; n < 8; n++) cnn->C3.v[i][m][n] += mapout[i-6][8*m+n]; for(i = 0; i < 6;i++) free(mapout[i]); free(mapout); } for (i = 0; i < 6; i++) free(wholeKernel[i]); free(wholeKernel); for (i=0; i < cnn->C3.outChannels; i++) for(r=0;r<outSize.r;r++) for(c=0;c<outSize.c;c++) cnn->C3.y[i][r][c]=activation_Sigma(cnn->C3.v[i][r][c],cnn->C3.basicData[i]); //////////////////////////////////////////////////////////////////////////////////////////////////// int cov2layerend = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); xil_printf("%d cycles spent on cov2\n", cov2layerend - cov2layerstart); // 第四层的Pooling层 inSize.c=cnn->S4.inputWidth; inSize.r=cnn->S4.inputHeight; outSize.c=inSize.c/cnn->S4.mapSize; outSize.r=inSize.r/cnn->S4.mapSize; for(i=0;i<(cnn->S4.outChannels);i++) { if(cnn->S4.poolType == AvePool) avgPooling(cnn->S4.y[i],outSize,cnn->C3.y[i],inSize,cnn->S4.mapSize); } // 输出层O5的处理 // 首先需要将前面的多维输出展开成一维向量 float O5inData[192]; for(i = 0;i < (cnn->S4.outChannels);i++) for(r = 0;r < outSize.r;r++) for(c = 0;c < outSize.c;c++) O5inData[i*outSize.r*outSize.c+r*outSize.c+c]=cnn->S4.y[i][r][c]; nSize nnSize = {cnn->O5.inputNum, cnn->O5.outputNum}; nnff(cnn->O5.v, O5inData, cnn->O5.wData, cnn->O5.basicData, nnSize); // 计算每一个数字的概率 for(i = 0;i < cnn->O5.outputNum;i++) cnn->O5.y[i] = activation_Sigma(cnn->O5.v[i],cnn->O5.basicData[i]); }
//trains a neural net void FBNN::train(const FMatrix & train_x, const FMatrix & train_y, const Opts & opts, const FMatrix & valid_x, const FMatrix & valid_y, const FBNN_ptr pFBNN) { int ibatchNum = train_x.rows() / opts.batchsize + (train_x.rows() % opts.batchsize != 0); FMatrix L = zeros(opts.numpochs * ibatchNum, 1); m_oLp = std::make_shared<FMatrix>(L); Loss loss; // std::cout << "numpochs = " << opts.numpochs << std::endl; for(int i = 0; i < opts.numpochs; ++i) { std::cout << "start numpochs " << i << std::endl; int elapsedTime = count_elapse_second([&train_x,&train_y,&L,&opts,i,pFBNN,ibatchNum,this]{ std::vector<int> iRandVec; randperm(train_x.rows(),iRandVec); std::cout << "start batch: "; for(int j = 0; j < ibatchNum; ++j) { std::cout << " " << j; if(pFBNN)//pull { // TMutex::scoped_lock lock; // lock.acquire(pFBNN->W_RWMutex,false); // lock.release();//reader lock tbb boost::shared_lock<RWMutex> rlock(pFBNN->W_RWMutex); set_m_oWs(pFBNN->get_m_oWs()); if(m_fMomentum > 0) set_m_oVWs(pFBNN->get_m_oVWs()); rlock.unlock(); } int curBatchSize = opts.batchsize; if(j == ibatchNum - 1 && train_x.rows() % opts.batchsize != 0) curBatchSize = train_x.rows() % opts.batchsize; FMatrix batch_x(curBatchSize,train_x.columns()); for(int r = 0; r < curBatchSize; ++r)//randperm() row(batch_x,r) = row(train_x,iRandVec[j * opts.batchsize + r]); //Add noise to input (for use in denoising autoencoder) if(m_fInputZeroMaskedFraction != 0) batch_x = bitWiseMul(batch_x,(rand(curBatchSize,train_x.columns())>m_fInputZeroMaskedFraction)); FMatrix batch_y(curBatchSize,train_y.columns()); for(int r = 0; r < curBatchSize; ++r)//randperm() row(batch_y,r) = row(train_y,iRandVec[j * opts.batchsize + r]); L(i*ibatchNum+j,0) = nnff(batch_x,batch_y); nnbp(); nnapplygrads(); if(pFBNN)//push { // TMutex::scoped_lock lock; // lock.acquire(W_RWMutex); // lock.release();//writer lock tbb boost::unique_lock<RWMutex> wlock(pFBNN->W_RWMutex); pFBNN->set_m_odWs(m_odWs); pFBNN->nnapplygrads(); wlock.unlock(); } // std::cout << "end batch " << j << std::endl; } std::cout << std::endl; }); std::cout << "elapsed time: " << elapsedTime << "s" << std::endl; //loss calculate use nneval if(valid_x.rows() == 0 || valid_y.rows() == 0){ nneval(loss, train_x, train_y); std::cout << "Full-batch train mse = " << loss.train_error.back() << std::endl; } else{ nneval(loss, train_x, train_y, valid_x, valid_y); std::cout << "Full-batch train mse = " << loss.train_error.back() << " , val mse = " << loss.valid_error.back() << std::endl; } std::cout << "epoch " << i+1 << " / " << opts.numpochs << " took " << elapsedTime << " seconds." << std::endl; std::cout << "Mini-batch mean squared error on training set is " << columnMean(submatrix(L,i*ibatchNum,0UL,ibatchNum,L.columns())) << std::endl; m_iLearningRate *= m_fScalingLearningRate; // std::cout << "end numpochs " << i << std::endl; } }
int nnpredict(NN* nn, int data_index, const double** test_x) { nnff(nn, data_index, test_x); return indOfMaxVal_double( nn->layer[ nn->n - 2 ].a, nn->layer[ nn->n - 2 ].units ) ; //the last layer's a }