void save_convolutional_weights_binary(layer l, FILE *fp) { #ifdef GPU if(gpu_index >= 0){ pull_convolutional_layer(l); } #endif binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); int size = l.c*l.size*l.size; int i, j, k; fwrite(l.biases, sizeof(float), l.n, fp); if (l.batch_normalize){ fwrite(l.scales, sizeof(float), l.n, fp); fwrite(l.rolling_mean, sizeof(float), l.n, fp); fwrite(l.rolling_variance, sizeof(float), l.n, fp); } for(i = 0; i < l.n; ++i){ float mean = l.binary_weights[i*size]; if(mean < 0) mean = -mean; fwrite(&mean, sizeof(float), 1, fp); for(j = 0; j < size/8; ++j){ int index = i*size + j*8; unsigned char c = 0; for(k = 0; k < 8; ++k){ if (j*8 + k >= size) break; if (l.binary_weights[index + k] > 0) c = (c | 1<<k); } fwrite(&c, sizeof(char), 1, fp); } } }
void forward_convolutional_layer(convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); if(l.xnor){ binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights); swap_binary(&l); binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input); state.input = l.binary_input; } int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; float *a = l.weights; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } if(l.batch_normalize){ forward_batchnorm_layer(l, state); } add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); }
void forward_convolutional_layer(convolutional_layer l, network net) { int i, j; fill_cpu(l.outputs*l.batch, 0, l.output, 1); if(l.xnor){ binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); swap_binary(&l); binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); net.input = l.binary_input; } int m = l.n/l.groups; int k = l.size*l.size*l.c/l.groups; int n = l.out_w*l.out_h; for(i = 0; i < l.batch; ++i){ for(j = 0; j < l.groups; ++j){ float *a = l.weights + j*l.nweights/l.groups; float *b = net.workspace; float *c = l.output + (i*l.groups + j)*n*m; im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } } if(l.batch_normalize){ forward_batchnorm_layer(l, net); } else { add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); } activate_array(l.output, l.outputs*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); }