void update_z( float* In, float* Out, int M, int T, double* z, int32_t x, int32_t* context, int64_t context_length, int32_t* paths, int8_t* codes, int64_t length) { int32_t y; int8_t* code; int32_t* path; for (int ci = 0; ci < context_length; ++ci) { y = context[ci]; path = paths + y * length; code = codes + y * length; for (int n = 0; n < length && code[n] != -1; ++n) { float* out = Out + path[n] * M; for (int k = 0; k < T; ++k) { float* in = in_offset(In, x, k, M, T); float f = 0; for (int i = 0; i < M; ++i) f += in[i] * out[i]; z[k] += logsigmoid(f * (1 - 2*code[n])); } } } double z_max = z[0], z_k; for (int k = 1; k < T; ++k) { z_k = z[k]; if (z_k > z_max) { z_max = z_k; } } double z_sum = 0.; for (int k = 0; k < T; ++k) { z_k = exp(z[k] - z_max); z[k] = z_k; z_sum += z_k; } for (int k = 0; k < T; ++k) { z[k] /= z_sum; } }
float inplace_update( float* In, float* Out, int M, int T, double* z, int32_t x, int32_t* context, int context_length, int32_t* paths, int8_t* codes, int64_t length, float* in_grad, float* out_grad, float lr, float sense_threshold) { float pr = 0; int32_t y; int8_t* code; int32_t* path; for (int ci = 0; ci < context_length; ++ci) { y = context[ci]; path = paths + y * length; code = codes + y * length; for (int k = 0; k < T; ++k) for (int i = 0; i < M; ++i) in_grad[k*M + i] = 0; for (int n = 0; n < length && code[n] != -1; ++n) { float* out = Out + path[n]*M; for (int i = 0; i < M; ++i) out_grad[i] = 0; for (int k = 0; k < T; ++k) { if (z[k] < sense_threshold) continue; float* in = in_offset(In, x, k, M, T); float f = 0; for (int i = 0; i < M; ++i) f += in[i] * out[i]; pr += z[k] * logsigmoid(f * (1 - 2*code[n])); float d = 1 - code[n] - sigmoid(f); float g = z[k] * lr * d; for (int i = 0; i < M; ++i) { in_grad[k*M + i] += g * out[i]; out_grad[i] += g * in[i]; } } for (int i = 0; i < M; ++i) out[i] += out_grad[i]; } for (int k = 0; k < T; ++k) { if (z[k] < sense_threshold) continue; float* in = in_offset(In, x, k, M, T); for (int i = 0; i < M; ++i) in[i] += in_grad[k*M + i]; } } return pr; }
void caffe_conv(const Blob<Dtype>* in, ConvolutionParameter* conv_param, const vector<shared_ptr<Blob<Dtype> > >& weights, Blob<Dtype>* out) { const bool has_depth = (out->num_axes() == 5); if (!has_depth) { CHECK_EQ(4, out->num_axes()); } // Kernel size, stride, and pad int kernel_h, kernel_w; if (conv_param->has_kernel_h() || conv_param->has_kernel_w()) { kernel_h = conv_param->kernel_h(); kernel_w = conv_param->kernel_w(); } else { kernel_h = kernel_w = conv_param->kernel_size(0); } int pad_h, pad_w; if (conv_param->has_pad_h() || conv_param->has_pad_w()) { pad_h = conv_param->pad_h(); pad_w = conv_param->pad_w(); } else { pad_h = pad_w = conv_param->pad_size() ? conv_param->pad(0) : 0; } int stride_h, stride_w; if (conv_param->has_stride_h() || conv_param->has_stride_w()) { stride_h = conv_param->stride_h(); stride_w = conv_param->stride_w(); } else { stride_h = stride_w = conv_param->stride_size() ? conv_param->stride(0) : 1; } int kernel_d, pad_d, stride_d; if (has_depth) { kernel_d = kernel_h; stride_d = stride_h; pad_d = pad_h; } else { kernel_d = stride_d = 1; pad_d = 0; } // Groups int groups = conv_param->group(); int o_g = out->shape(1) / groups; int k_g = in->shape(1) / groups; int o_head, k_head; // Convolution vector<int> weight_offset(4 + has_depth); vector<int> in_offset(4 + has_depth); vector<int> out_offset(4 + has_depth); Dtype* out_data = out->mutable_cpu_data(); for (int n = 0; n < out->shape(0); n++) { for (int g = 0; g < groups; g++) { o_head = o_g * g; k_head = k_g * g; for (int o = 0; o < o_g; o++) { for (int k = 0; k < k_g; k++) { for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { for (int y = 0; y < out->shape(2 + has_depth); y++) { for (int x = 0; x < out->shape(3 + has_depth); x++) { for (int r = 0; r < kernel_d; r++) { for (int p = 0; p < kernel_h; p++) { for (int q = 0; q < kernel_w; q++) { int in_z = z * stride_d - pad_d + r; int in_y = y * stride_h - pad_h + p; int in_x = x * stride_w - pad_w + q; if (in_z >= 0 && in_z < (has_depth ? in->shape(2) : 1) && in_y >= 0 && in_y < in->shape(2 + has_depth) && in_x >= 0 && in_x < in->shape(3 + has_depth)) { weight_offset[0] = o + o_head; weight_offset[1] = k; if (has_depth) { weight_offset[2] = r; } weight_offset[2 + has_depth] = p; weight_offset[3 + has_depth] = q; in_offset[0] = n; in_offset[1] = k + k_head; if (has_depth) { in_offset[2] = in_z; } in_offset[2 + has_depth] = in_y; in_offset[3 + has_depth] = in_x; out_offset[0] = n; out_offset[1] = o + o_head; if (has_depth) { out_offset[2] = z; } out_offset[2 + has_depth] = y; out_offset[3 + has_depth] = x; out_data[out->offset(out_offset)] += in->data_at(in_offset) * weights[0]->data_at(weight_offset); } } } } } } } } } } } // Bias if (conv_param->bias_term()) { const Dtype* bias_data = weights[1]->cpu_data(); for (int n = 0; n < out->shape(0); n++) { for (int o = 0; o < out->shape(1); o++) { for (int z = 0; z < (has_depth ? out->shape(2) : 1); z++) { for (int y = 0; y < out->shape(2 + has_depth); y++) { for (int x = 0; x < out->shape(3 + has_depth); x++) { out_offset[0] = n; out_offset[1] = o; if (has_depth) { out_offset[2] = z; } out_offset[2 + has_depth] = y; out_offset[3 + has_depth] = x; out_data[out->offset(out_offset)] += bias_data[o]; } } } } } } }