void save_convolutional_weights_binary(layer l, FILE *fp) { #ifdef GPU if(gpu_index >= 0){ pull_convolutional_layer(l); } #endif binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters); int size = l.c*l.size*l.size; int i, j, k; fwrite(l.biases, sizeof(float), l.n, fp); if (l.batch_normalize){ fwrite(l.scales, sizeof(float), l.n, fp); fwrite(l.rolling_mean, sizeof(float), l.n, fp); fwrite(l.rolling_variance, sizeof(float), l.n, fp); } for(i = 0; i < l.n; ++i){ float mean = l.binary_filters[i*size]; if(mean < 0) mean = -mean; fwrite(&mean, sizeof(float), 1, fp); for(j = 0; j < size/8; ++j){ int index = i*size + j*8; unsigned char c = 0; for(k = 0; k < 8; ++k){ if (j*8 + k >= size) break; if (l.binary_filters[index + k] > 0) c = (c | 1<<k); } fwrite(&c, sizeof(char), 1, fp); } } }
void load_convolutional_weights(layer l, FILE *fp) { if(l.binary){ //load_convolutional_weights_binary(l, fp); //return; } int num = l.n*l.c*l.size*l.size; fread(l.biases, sizeof(float), l.n, fp); if (l.batch_normalize && (!l.dontloadscales)){ fread(l.scales, sizeof(float), l.n, fp); fread(l.rolling_mean, sizeof(float), l.n, fp); fread(l.rolling_variance, sizeof(float), l.n, fp); } fread(l.filters, sizeof(float), num, fp); if (l.flipped) { transpose_matrix(l.filters, l.c*l.size*l.size, l.n); } if (l.binary) binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.filters); #ifdef GPU if(gpu_index >= 0){ push_convolutional_layer(l); } #endif }
void forward_convolutional_layer(convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); /* if(l.binary){ binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters); binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales); swap_binary(&l); } */ /* if(l.binary){ int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; char *a = l.cfilters; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm_bin(m,n,k,1,a,k,b,n,c,n); c += n*m; state.input += l.c*l.h*l.w; } scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w); add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); return; } */ if(l.xnor && (l.c%32 != 0 || !AI2)){ binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters); swap_binary(&l); for(i = 0; i < l.batch; ++i){ binarize_input(state.input + i*l.inputs, l.c, l.h*l.w, l.binary_input + i*l.inputs); } state.input = l.binary_input; } int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; if (l.xnor && l.c%32 == 0 && AI2) { forward_xnor_layer(l, state); printf("xnor\n"); } else { float *a = l.filters; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } } if(l.batch_normalize){ forward_batchnorm_layer(l, state); } add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); }