operateLayer1(short* img_ptr, uint32_t w, uint32_t h) { uint32_t i; uint32_t j; short pixels = w*h; short kern_index; uint32_t local_l1_maps = maps_info_ptr->l1_maps[DNUM]; for (i = 0; i<local_l1_maps; i++) { if(DNUM<maps_info_ptr->mod_l1) kern_index = (DNUM*local_l1_maps)+i; else kern_index = (DNUM*local_l1_maps)+i+maps_info_ptr->mod_l1; Conv5x5(img_ptr,temp_img_ptr,w,h,&kernel5x5[kern_index][0]); for (j=0; j<pixels; j++) { temp_img_ptr[j] = ((temp_img_ptr[j] < 0) ? 0 : temp_img_ptr[j]); } #ifndef FULLY_CONNECTED maxpool(temp_img_ptr,(local_l1_maps_ptr+(i*l1_size)), w, h); #else maxpool(temp_img_ptr,temp1_ptr, w, h); DSP_add16_shift(local_l1_maps_ptr,temp1_ptr,local_l1_maps_ptr,(w*h/4),1); #endif } }
mat MaxPoolLayer::forwardprop(const mat& pa) { a = mat(pa.n_rows, outputsize * nfilter, arma::fill::zeros); images = cube(inputheight + 2*padding, inputwidth + 2*padding, pa.n_rows); indexes = mat(pa.n_rows, outputsize*2); for (uint32_t i = 0 ; i < pa.n_rows ; ++i) { // reshape input sample mat sample = pa.row(i); for (int j = 0 ; j < pnfilter ; ++j) { const mat img = toimage(pa, j, inputwidth, inputheight); images.slice(i) = addzeropadding(img); #ifdef HAVE_OPENMP #pragma omp parallel for default(none) shared(j,i) if (nfilter >= 16) #endif for (int k = 0 ; k < nfilter ; ++k) { mat partialoutput; mat index; maxpool(images.slice(i), partialoutput, index); a.submat(i, k*outputsize, i, (k+1)*outputsize-1) += partialoutput; indexes.submat(i, 0, i, outputsize*2-1) = index; } } } return funcop(a, actfunc.act); }
operateLayer2(uint32_t w, uint32_t h) { uint8_t i,k; uint32_t j; short* ptr1 = NULL; short* ptr2 = NULL; short pixels = w*h; short kern_index = 0; uint32_t local_l2_maps = maps_info_ptr->l2_maps[DNUM]; for (i = 0; i<local_l2_maps; i++) { if(DNUM<maps_info_ptr->mod_l2) kern_index = (DNUM*local_l2_maps)+i; else kern_index = (DNUM*local_l2_maps)+i+maps_info_ptr->mod_l2; #ifndef FULLY_CONNECTED uint8_t selection; selection = rand() % L1_MAPS; ptr1 = (local_l1_maps_ptr+(selection*l1_size)); Conv5x5(ptr1,temp2_ptr, w, h, &kernel5x5[kern_index][0]); for(k=1;k<L1_L2_CONNECTIONS;k=k+2) { selection = rand() % L1_MAPS; ptr2 = (local_l1_maps_ptr+(selection*l1_size)); Conv5x5(ptr2,temp1_ptr, w, h, &kernel5x5[kern_index][0]); DSP_add16_shift(temp2_ptr,temp1_ptr,layer1_ptr,pixels,1); selection = rand() % L1_MAPS; ptr2 = (local_l1_maps_ptr+(selection*l1_size)); Conv5x5(ptr2,temp1_ptr, w, h, &kernel5x5[kern_index][0]); DSP_add16_shift(temp1_ptr,layer1_ptr,temp2_ptr,pixels,1); } #else ptr1 = local_l1_maps_ptr; Conv5x5(ptr1,temp2_ptr, w, h, &kernel5x5[kern_index][0]); for(k=1;k<L1_MAPS;k=k+2) { ptr2 = local_l1_maps_ptr + (k*l1_size); Conv5x5(ptr2,temp1_ptr, w, h, &kernel5x5[kern_index][0]); DSP_add16_shift(temp2_ptr,temp1_ptr,layer1_ptr,pixels,1); ptr2 = ptr2 + l1_size; Conv5x5(ptr2,temp1_ptr, w, h, &kernel5x5[kern_index][0]); DSP_add16_shift(temp1_ptr,layer1_ptr,temp2_ptr,pixels,1); } #endif for (j=0; j<pixels; j++) { temp2_ptr[j] = ((temp2_ptr[j] < 0) ? 0 : temp2_ptr[j]); } maxpool(temp2_ptr,(local_l2_maps_ptr+(i*l2_size)), w, h); } }
operateLayer1( uint32_t w, uint32_t h) { uint32_t i; uint32_t j; short pixels = w*h; for (i = 0; i<L1_MAPS; i++) { Conv5x5(maps_info_1.img_ptr,temp_img_ptr,w,h,&kernel5x5[i][0]); for (j=0; j<pixels; j++) { temp_img_ptr[j] = ((temp_img_ptr[j] < 0) ? 0 : temp_img_ptr[j]); } #ifndef FULLY_CONNECTED maxpool(temp_img_ptr,(maps_info_1.l1_maps_ptr+(i*l1_size)), w, h); #else maxpool(temp_img_ptr,temp1_ptr, w, h); DSP_add16_shift(maps_info_1.l1_maps_ptr,temp1_ptr,maps_info_1.l1_maps_ptr,(w*h/4),1); #endif } }
operateLayer2(uint32_t w, uint32_t h) { uint8_t i,k; uint32_t j; short* ptr1 = NULL; short pixels = w*h; short kern_index = 0; uint32_t local_l2_maps = maps_info_ptr->l2_maps[DNUM]; #ifdef FULLY_CONNECTED ptr1 = layer1_ptr; for(k=0;k<NUM_CORES;k++) DSP_add16_shift((short*)maps_info_ptr->l1_maps_ptr[k],ptr1,ptr1,pixels,1); #endif for (i = 0; i<local_l2_maps; i++) { if(DNUM<maps_info_ptr->mod_l2) kern_index = (DNUM*local_l2_maps)+i; else kern_index = (DNUM*local_l2_maps)+i+maps_info_ptr->mod_l2; #ifndef FULLY_CONNECTED uint8_t selection; short* global_l1_map_start_ptr = (short*)maps_info_ptr->l1_maps_ptr[0]; short* ptr2 = NULL; selection = rand() % L1_MAPS; ptr1 = (global_l1_map_start_ptr+(selection*l1_size)); for(k=0;k<L1_L2_CONNECTIONS/2;k++) { selection = rand() % L1_MAPS; ptr2 = (global_l1_map_start_ptr+(selection*l1_size)); DSP_add16_shift(ptr1,ptr2,temp1_ptr,pixels,1); selection = rand() % L1_MAPS; ptr2 = (global_l1_map_start_ptr+(selection*l1_size)); DSP_add16_shift(temp1_ptr,ptr2,ptr1,pixels,1); } #endif Conv5x5(ptr1,layer1_ptr, w, h, &kernel5x5[kern_index][0]); for (j=0; j<pixels; j++) { layer1_ptr[j] = ((layer1_ptr[j] < 0) ? 0 : layer1_ptr[j]); } maxpool(layer1_ptr,(local_l2_maps_ptr+(i*l2_size)), w, h); } }
operateLayer1(short* img_ptr, uint32_t w, uint32_t h) { uint32_t i; uint32_t j; short pixels = w*h; short kern_index; uint32_t local_l1_maps = maps_info_ptr->l1_maps[DNUM]; for (i = 0; i<local_l1_maps; i++) { if(DNUM<maps_info_ptr->mod_l1) kern_index = (DNUM*local_l1_maps)+i; else kern_index = (DNUM*local_l1_maps)+i+maps_info_ptr->mod_l1; Conv7x7(img_ptr,temp_img_ptr,w,h,&kernel7x7[kern_index][0]); for (j=0; j<pixels; j++) { temp_img_ptr[j] = ((temp_img_ptr[j] < 0) ? 0 : temp_img_ptr[j]); } maxpool(temp_img_ptr,(local_l1_maps_ptr+(i*l1_size)), w, h); } }
operateLayer2(uint32_t w, uint32_t h) { uint8_t i; uint32_t j; short pixels = w*h; for (i = 0; i<L2_MAPS; i++) { #ifndef FULLY_CONNECTED uint8_t selection; short* l1_maps_ptr = (short*)maps_info_1.l1_maps_ptr[0]; short* ptr2 = NULL; selection = rand() % L1_MAPS; ptr1 = (l1_maps_ptr+(selection*l1_size)); for(k=0;k<L1_L2_CONNECTIONS/2;k++) { selection = rand() % L1_MAPS; ptr2 = (l1_maps_ptr+(selection*l1_size)); DSP_add16_shift(ptr1,ptr2,temp1_ptr,pixels,1); selection = rand() % L1_MAPS; ptr2 = (l1_maps_ptr+(selection*l1_size)); DSP_add16_shift(temp1_ptr,ptr2,ptr1,pixels,1); } #endif Conv5x5(maps_info_1.l1_maps_ptr,layer1_ptr, w, h, &kernel5x5[i][0]); for (j=0; j<pixels; j++) { layer1_ptr[j] = ((layer1_ptr[j] < 0) ? 0 : layer1_ptr[j]); } maxpool(layer1_ptr,(maps_info_1.l2_maps_ptr+(i*l2_size)), w, h); } }