void forward_connected_layer(connected_layer l, network_state state) { int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); int m = l.batch; int k = l.inputs; int n = l.outputs; float *a = state.input; float *b = l.weights; float *c = l.output; gemm(0,1,m,n,k,1,a,k,b,k,1,c,n); if(l.batch_normalize){ if(state.train){ mean_cpu(l.output, l.batch, l.outputs, 1, l.mean); variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance); scal_cpu(l.outputs, .95, l.rolling_mean, 1); axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1); scal_cpu(l.outputs, .95, l.rolling_variance, 1); axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); } else { normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1); } scale_bias(l.output, l.scales, l.batch, l.outputs, 1); } for(i = 0; i < l.batch; ++i){ axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1); } activate_array(l.output, l.outputs*l.batch, l.activation); }
void forward_batchnorm_layer(layer l, network_state state) { if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); if(l.type == CONNECTED){ l.out_c = l.outputs; l.out_h = l.out_w = 1; } if(state.train){ mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); scal_cpu(l.out_c, .99, l.rolling_mean, 1); axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); scal_cpu(l.out_c, .99, l.rolling_variance, 1); axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); } else { normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); } scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); }
void forward_convolutional_layer(const convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; float *a = l.filters; float *b = l.col_image; float *c = l.output; // printf("the l.size is %i \n", l.size); ///* //printf("the m,k,n is %i,%i,%i \n", m,k,n); for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } //*/ //add by fanghao /* int ii,jj,kk,mm,pp,tt; int lcc = l.c; int lhh = l.h; int lww = l.w; int kernel = l.size; int pad; if(l.pad) pad = l.size/2; else pad = l.pad; lhh += 2*pad; lww += 2*pad; float *dataP; dataP = (float *)calloc(lcc*lhh*lww, sizeof(float)); //printf("the l.h is %i \n", l.h); //printf("the l.w is %i \n", l.w); //printf("the lhh is %i \n", lhh); //printf("the lww is %i \n", lww); //printf("the pad is %i \n", pad); for(ii=0; ii < lcc; ii++) for(jj=pad; jj<lhh-pad; jj++) for(kk=pad; kk<lww-pad; kk++) dataP[ii*lhh*lww + jj*lww + kk] = state.input[ii*(lhh - 2*pad)*(lww-2*pad) + (jj - pad)*(lww - 2*pad) + kk-pad]; for(ii=0; ii<m; ii++) for(jj=0; jj<out_h; jj++) for(kk=0; kk<out_w; kk++) { float tempAcc = 0.0; for(mm=0; mm<lcc; mm++) for(pp=0; pp<kernel; pp++) for(tt=0; tt<kernel; tt++) tempAcc += a[ii*lcc*kernel*kernel+mm*kernel*kernel+pp*kernel+tt]*dataP[mm*lhh*lww+(l.stride*jj+pp)*lww+l.stride*kk+tt]; c[ii*out_h*out_w+jj*out_w+kk] = tempAcc; } // c += n*m; //state.input += l.c*l.h*l.w; // */ if(l.batch_normalize){ if(state.train){ mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean); variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w); } else { normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w); } scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w); } add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); }
void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num) { network *net = load_network(cfgfile, weightfile, 0); set_batch_network(net, 1); srand(2222222); list *options = read_data_cfg(datacfg); char *name_list = option_find_str(options, "names", 0); if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list"); int top = option_find_int(options, "top", 1); int i = 0; char **names = get_labels(name_list); clock_t time; int *indexes = calloc(top, sizeof(int)); char buff[256]; char *input = buff; while(1){ if(filename){ strncpy(input, filename, 256); }else{ printf("Enter Image Path: "); fflush(stdout); input = fgets(input, 256, stdin); if(!input) return; strtok(input, "\n"); } image orig = load_image_color(input, 0, 0); image r = resize_min(orig, 256); image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224); float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742}; float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583}; float var[3]; var[0] = std[0]*std[0]; var[1] = std[1]*std[1]; var[2] = std[2]*std[2]; normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h); float *X = im.data; time=clock(); float *predictions = network_predict(net, X); layer l = net->layers[layer_num]; for(i = 0; i < l.c; ++i){ if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]); } #ifdef GPU cuda_pull_array(l.output_gpu, l.output, l.outputs); #endif for(i = 0; i < l.outputs; ++i){ printf("%f\n", l.output[i]); } /* printf("\n\nWeights\n"); for(i = 0; i < l.n*l.size*l.size*l.c; ++i){ printf("%f\n", l.filters[i]); } printf("\n\nBiases\n"); for(i = 0; i < l.n; ++i){ printf("%f\n", l.biases[i]); } */ top_predictions(net, top, indexes); printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); for(i = 0; i < top; ++i){ int index = indexes[i]; printf("%s: %f\n", names[index], predictions[index]); } free_image(im); if (filename) break; } }