int main() { int numClockCycles = 0; Xil_ICacheEnable(); Xil_DCacheEnable(); print("---Entering main---\n\r"); print("---Trial Name \t Trial # \t Clock Cycles---\n\r"); int i = 0; // Extra Method contains an interrupt routine which is set to go off at timed intervals extra_method(); for( i=0; i < NUMBER_OF_TRIALS; i++) { //TIMER RESET CODE //Turn off the timer XTmrCtr_SetControlStatusReg(XPAR_TMRCTR_0_BASEADDR, 1, 0); //Put a zero in the load register XTmrCtr_SetLoadReg(XPAR_TMRCTR_0_BASEADDR, 1, 0); //Copy the load register into the counter register XTmrCtr_SetControlStatusReg(XPAR_TMRCTR_0_BASEADDR, 1, XTC_CSR_LOAD_MASK); //Enable (start) the timer XTmrCtr_SetControlStatusReg(XPAR_TMRCTR_0_BASEADDR, 1, XTC_CSR_ENABLE_TMR_MASK); //END TIMER RESET CODE //blinkLED(int numberOfBlinks); //offLED(); //onLED(); //sevenSegment(); //printLongerStrings(); // Write this function //printShortStrings(); // Write this function //printfShortStrings(); // Write this function //xil_printfShortStrings(); // Write this function //intAddAndMultiply(); // Write this function //floatAddAndMultiply(); // Write this function numClockCycles = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, 1); xil_printf("print \t%d\t%d\n", i,numClockCycles ); } return 0; }
// 这里InputData是图像数据,inputData[r][c],r行c列,这里跟各权重模板是一致的 void cnnff(CNN* cnn,float** inputData) { // 第一层的传播 int i, j, r, c; // 第一层卷积层输出数据(C1) nSize mapSize = {cnn->C1.mapSize, cnn->C1.mapSize}; nSize inSize = {cnn->C1.inputWidth, cnn->C1.inputHeight}; nSize outSize = {cnn->S2.inputWidth, cnn->S2.inputHeight}; float **wholeKernel = (float**)malloc(6 * sizeof(float*)); for (i = 0; i < 6; i++) wholeKernel[i] = (float*)malloc(25 * sizeof(float)); int cov1layerstart = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); for(j = 0;j < (cnn->C1.inChannels);j++) { // 卷积 int m, n, k; int l; for (m = 0; m < 6; m++) { l = 0; for (k = 0; k < 5; k++) { for (n = 0; n < 5; n++) { wholeKernel[m][l] = cnn->C1.mapData[j][m][4-k][4-n]; l++; } } } float** mapout = cov_layer1_6(wholeKernel,mapSize,inputData,inSize,valid); // 求和 k = 0; for (i = 0; i < 6; i++) for (m = 0; m < 24; m++) for (n = 0; n < 24; n++) cnn->C1.v[i][m][n] += mapout[i][24*m+n]; } for(i = 0;i < (cnn->C1.outChannels);i++) for(r = 0;r < outSize.r;r++) for(c = 0;c < outSize.c;c++) // sigmoid function cnn->C1.y[i][r][c] = activation_Sigma(cnn->C1.v[i][r][c], cnn->C1.basicData[i]); // for(i=0;i<(cnn->C1.outChannels);i++) // { // for(j=0;j<(cnn->C1.inChannels);j++) // { // float** mapout=cov(cnn->C1.mapData[j][i],mapSize,inputData,inSize,valid); // addmat(cnn->C1.v[i],cnn->C1.v[i],outSize,mapout,outSize); // for(r=0;r<outSize.r;r++) // free(mapout[r]); // free(mapout); // } // for(r=0;r<outSize.r;r++) // for(c=0;c<outSize.c;c++) // cnn->C1.y[i][r][c]=activation_Sigma(cnn->C1.v[i][r][c],cnn->C1.basicData[i]); // } int cov1layerend = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); xil_printf("%d cycles spent on cov2\n", cov1layerend - cov1layerstart); // 第二层的输出传播S2,采样层 outSize.c = cnn->C3.inputWidth; outSize.r = cnn->C3.inputHeight; inSize.c = cnn->S2.inputWidth; inSize.r = cnn->S2.inputHeight; for(i = 0;i < (cnn->S2.outChannels);i++) { // pooling的类型是取平均 if(cnn->S2.poolType == AvePool) avgPooling(cnn->S2.y[i], outSize, cnn->C1.y[i], inSize, cnn->S2.mapSize); } /////////////////////////////////////////////////////////////////////////////////////////////////////////////// outSize.c = cnn->S4.inputWidth; outSize.r = cnn->S4.inputHeight; inSize.c = cnn->C3.inputWidth; inSize.r = cnn->C3.inputHeight; mapSize.c = cnn->C3.mapSize; mapSize.r = cnn->C3.mapSize; // for(i = 0;i < (cnn->C3.outChannels);i++) // 12 // { // for(j = 0;j < (cnn->C3.inChannels);j++) //6 // { // float** mapout = cov(cnn->C3.mapData[j][i], mapSize, cnn->S2.y[j], inSize, valid); // addmat(cnn->C3.v[i],cnn->C3.v[i], outSize, mapout, outSize); // } // //// for(j = 0;j < (cnn->C3.inChannels);j++) //// { //// mapKernel[j] = cnn->C3.mapData[j][i]; //// float** mapout = cov_layer3(cnn->C3.mapData[j][i], mapSize, cnn->S2.y[j], inSize, valid); //// //// addmat(cnn->C3.v[i],cnn->C3.v[i],outSize,mapout,outSize); //// } // // for(r = 0;r < outSize.r;r++) // for(c = 0;c < outSize.c;c++) // cnn->C3.y[i][r][c] = activation_Sigma(cnn->C3.v[i][r][c],cnn->C3.basicData[i]); // } // float **wholeKernel = (float**)malloc(6*sizeof(float*)); // for (i = 0; i < 6; i++) // wholeKernel[i] = (float*)malloc(25 * sizeof(float)); int cov2layerstart = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); for(j = 0;j < (cnn->C3.inChannels);j++) { int m, n, k; int l; // convert 6 kernel to vector for (m = 0; m < 6; m++) { l = 0; for (k = 0; k < 5; k++) { for (n = 0; n < 5; n++) { wholeKernel[m][l] = cnn->C3.mapData[j][m][4-k][4-n]; l++; } } } float** mapout = cov_layer3_6(wholeKernel, mapSize, cnn->S2.y[j], inSize, valid); // add all mapout to the v for (i = 0; i < 6; i++) for (m = 0; m < 8; m++) for (n = 0; n < 8; n++) cnn->C3.v[i][m][n] += mapout[i][8*m+n]; for(i = 0; i < 6;i++) free(mapout[i]); free(mapout); for (m = 0; m < 6; m++) { l = 0; for (k = 0; k < 5; k++) { for (n = 0; n < 5; n++) { wholeKernel[m][l] = cnn->C3.mapData[j][m+6][4-k][4-n]; l++; } } } mapout = cov_layer3_6(wholeKernel, mapSize, cnn->S2.y[j], inSize, valid); for (; i < 12; i++) for (m = 0; m < 8; m++) for (n = 0; n < 8; n++) cnn->C3.v[i][m][n] += mapout[i-6][8*m+n]; for(i = 0; i < 6;i++) free(mapout[i]); free(mapout); } for (i = 0; i < 6; i++) free(wholeKernel[i]); free(wholeKernel); for (i=0; i < cnn->C3.outChannels; i++) for(r=0;r<outSize.r;r++) for(c=0;c<outSize.c;c++) cnn->C3.y[i][r][c]=activation_Sigma(cnn->C3.v[i][r][c],cnn->C3.basicData[i]); //////////////////////////////////////////////////////////////////////////////////////////////////// int cov2layerend = XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, TIMER_COUNTER_0); xil_printf("%d cycles spent on cov2\n", cov2layerend - cov2layerstart); // 第四层的Pooling层 inSize.c=cnn->S4.inputWidth; inSize.r=cnn->S4.inputHeight; outSize.c=inSize.c/cnn->S4.mapSize; outSize.r=inSize.r/cnn->S4.mapSize; for(i=0;i<(cnn->S4.outChannels);i++) { if(cnn->S4.poolType == AvePool) avgPooling(cnn->S4.y[i],outSize,cnn->C3.y[i],inSize,cnn->S4.mapSize); } // 输出层O5的处理 // 首先需要将前面的多维输出展开成一维向量 float O5inData[192]; for(i = 0;i < (cnn->S4.outChannels);i++) for(r = 0;r < outSize.r;r++) for(c = 0;c < outSize.c;c++) O5inData[i*outSize.r*outSize.c+r*outSize.c+c]=cnn->S4.y[i][r][c]; nSize nnSize = {cnn->O5.inputNum, cnn->O5.outputNum}; nnff(cnn->O5.v, O5inData, cnn->O5.wData, cnn->O5.basicData, nnSize); // 计算每一个数字的概率 for(i = 0;i < cnn->O5.outputNum;i++) cnn->O5.y[i] = activation_Sigma(cnn->O5.v[i],cnn->O5.basicData[i]); }
int counter_method(){ return XTmrCtr_GetTimerCounterReg(XPAR_TMRCTR_0_BASEADDR, 1); }