void forward_convolutional_layer(convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); /* if(l.binary){ binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters); binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales); swap_binary(&l); } */ if(l.binary){ int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; char *a = l.cfilters; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm_bin(m,n,k,1,a,k,b,n,c,n); c += n*m; state.input += l.c*l.h*l.w; } scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w); add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); return; } int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; float *a = l.filters; float *b = state.workspace; float *c = l.output; for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } if(l.batch_normalize){ forward_batchnorm_layer(l, state); } add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); }
void forward_batchnorm_layer(layer l, network_state state) { if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1); if(l.type == CONNECTED){ l.out_c = l.outputs; l.out_h = l.out_w = 1; } if(state.train){ mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean); variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance); scal_cpu(l.out_c, .99, l.rolling_mean, 1); axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1); scal_cpu(l.out_c, .99, l.rolling_variance, 1); axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1); copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w); copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1); } else { normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w); } scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w); add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w); }
void forward_deconvolutional_layer(const layer l, network_state state) { int i; int out_h = l.out_h; int out_w = l.out_w; int size = out_h*out_w; int m = l.size*l.size*l.n; int n = l.h*l.w; int k = l.c; fill_cpu(l.outputs*l.batch, 0, l.output, 1); for(i = 0; i < l.batch; ++i){ float *a = l.weights; float *b = state.input + i*l.c*l.h*l.w; float *c = state.workspace; gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size); } if(l.batch_normalize){ forward_batchnorm_layer(l, state); } else { add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); } activate_array(l.output, l.batch*l.n*size, l.activation); }
void forward_deconvolutional_layer(const layer l, network net) { int i; int m = l.size * l.size * l.n; int n = l.h * l.w; int k = l.c; fill_cpu(l.outputs * l.batch, 0, l.output, 1); for (i = 0; i < l.batch; ++i) { real_t *a = l.weights; real_t *b = net.input + i * l.c * l.h * l.w; real_t *c = net.workspace; gemm_cpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n); col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride, l.pad, l.output + i * l.outputs); } if (l.batch_normalize) { forward_batchnorm_layer(l, net); } else { add_bias(l.output, l.biases, l.batch, l.n, l.out_w * l.out_h); } activate_array(l.output, l.batch * l.n * l.out_w * l.out_h, l.activation); }
float_utilst::biased_floatt float_utilst::bias(const unbiased_floatt &src) { biased_floatt result; result.sign=src.sign; result.NaN=src.NaN; result.infinity=src.infinity; // we need to bias the new exponent result.exponent=add_bias(src.exponent); // strip off hidden bit assert(src.fraction.size()==spec.f+1); literalt hidden_bit=src.fraction[src.fraction.size()-1]; literalt denormal=!hidden_bit; result.fraction=src.fraction; result.fraction.resize(spec.f); // make exponent zero if its denormal // (includes zero) for(std::size_t i=0; i<result.exponent.size(); i++) result.exponent[i]= prop.land(result.exponent[i], !denormal); return result; }
void forward_convolutional_layer(convolutional_layer l, network net) { int i, j; fill_cpu(l.outputs*l.batch, 0, l.output, 1); if(l.xnor){ binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights); swap_binary(&l); binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input); net.input = l.binary_input; } int m = l.n/l.groups; int k = l.size*l.size*l.c/l.groups; int n = l.out_w*l.out_h; for(i = 0; i < l.batch; ++i){ for(j = 0; j < l.groups; ++j){ float *a = l.weights + j*l.nweights/l.groups; float *b = net.workspace; float *c = l.output + (i*l.groups + j)*n*m; im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w, l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); } } if(l.batch_normalize){ forward_batchnorm_layer(l, net); } else { add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w); } activate_array(l.output, l.outputs*l.batch, l.activation); if(l.binary || l.xnor) swap_binary(&l); }
void forward_convolutional_layer(const convolutional_layer l, network_state state) { int out_h = convolutional_out_height(l); int out_w = convolutional_out_width(l); int i; fill_cpu(l.outputs*l.batch, 0, l.output, 1); int m = l.n; int k = l.size*l.size*l.c; int n = out_h*out_w; float *a = l.filters; float *b = l.col_image; float *c = l.output; // printf("the l.size is %i \n", l.size); ///* //printf("the m,k,n is %i,%i,%i \n", m,k,n); for(i = 0; i < l.batch; ++i){ im2col_cpu(state.input, l.c, l.h, l.w, l.size, l.stride, l.pad, b); gemm(0,0,m,n,k,1,a,k,b,n,1,c,n); c += n*m; state.input += l.c*l.h*l.w; } //*/ //add by fanghao /* int ii,jj,kk,mm,pp,tt; int lcc = l.c; int lhh = l.h; int lww = l.w; int kernel = l.size; int pad; if(l.pad) pad = l.size/2; else pad = l.pad; lhh += 2*pad; lww += 2*pad; float *dataP; dataP = (float *)calloc(lcc*lhh*lww, sizeof(float)); //printf("the l.h is %i \n", l.h); //printf("the l.w is %i \n", l.w); //printf("the lhh is %i \n", lhh); //printf("the lww is %i \n", lww); //printf("the pad is %i \n", pad); for(ii=0; ii < lcc; ii++) for(jj=pad; jj<lhh-pad; jj++) for(kk=pad; kk<lww-pad; kk++) dataP[ii*lhh*lww + jj*lww + kk] = state.input[ii*(lhh - 2*pad)*(lww-2*pad) + (jj - pad)*(lww - 2*pad) + kk-pad]; for(ii=0; ii<m; ii++) for(jj=0; jj<out_h; jj++) for(kk=0; kk<out_w; kk++) { float tempAcc = 0.0; for(mm=0; mm<lcc; mm++) for(pp=0; pp<kernel; pp++) for(tt=0; tt<kernel; tt++) tempAcc += a[ii*lcc*kernel*kernel+mm*kernel*kernel+pp*kernel+tt]*dataP[mm*lhh*lww+(l.stride*jj+pp)*lww+l.stride*kk+tt]; c[ii*out_h*out_w+jj*out_w+kk] = tempAcc; } // c += n*m; //state.input += l.c*l.h*l.w; // */ if(l.batch_normalize){ if(state.train){ mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean); variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance); normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w); } else { normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w); } scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w); } add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w); activate_array(l.output, m*n*l.batch, l.activation); }
FLOAT inside_outside(grammar g, const si_t si, FILE *yieldfp, FILE *tracefp, FILE *summaryfp, int debuglevel, int maxsentlen, int minits, int maxits, FLOAT stoptol, FLOAT minruleprob, FLOAT jitter, int VariationalBayes, FLOAT wordscale, FLOAT annealstart, FLOAT annealstop, int nanneal, int weighted_yields_flag) { FLOAT *rule_counts = CALLOC(g->nrules, sizeof(FLOAT)); FLOAT sum_neglog_prob0; FLOAT sum_neglog_prob; int iteration = 0; size_t nrules, nrules0; FLOAT sum_yieldweights; FLOAT temperature = annealstart; nrules = g->nrules; if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "# Iteration\ttemperature\tnrules\t-logP\tbits/token\n%d\t%g\t%d", iteration, temperature, (int) nrules); else fprintf(summaryfp, "# Iteration %d, temperature = %g, %d rules, ", iteration, temperature, (int) nrules); fflush(summaryfp); } sum_neglog_prob0 = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxsentlen, minruleprob, wordscale, rule_counts, &sum_yieldweights, weighted_yields_flag); if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob0, sum_neglog_prob0/(log(2)*(sum_yieldweights))); else fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob0, sum_neglog_prob0/(log(2)*(sum_yieldweights))); fflush(summaryfp); } if (tracefp && debuglevel >= 10000) { write_rule_values(tracefp, g, si, rule_counts, 0); fprintf(tracefp, "\n"); fflush(tracefp); } if (summaryfp && debuglevel >= 5000 && debuglevel < 10000) write_grammar(summaryfp, g, si, minruleprob); while (1) { ++iteration; add_bias(g, rule_counts); set_rule_weights(g, rule_counts, VariationalBayes); prune_grammar(g, si, minruleprob); if (jitter != 0) jitter_weights(g, jitter); set_rule_weights(g, g->weights, 0); if (iteration < nanneal) { temperature = annealstart*pow(annealstop/annealstart, (iteration-1.0)/(nanneal-1.0)); scale_weights(g, 1.0/temperature); } else temperature = 1.0; nrules0 = nrules; nrules = g->nrules; if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "%d\t%g\t%d", iteration, temperature, (int) nrules); else fprintf(summaryfp, "# Iteration %d, temperature %g, %d rules, ", iteration, temperature, (int) nrules); fflush(summaryfp); } sum_neglog_prob = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel, maxsentlen, minruleprob, wordscale, rule_counts, &sum_yieldweights, weighted_yields_flag); if (summaryfp && debuglevel >= 1000) { if (debuglevel < 5000) fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob, sum_neglog_prob/(log(2)*(sum_yieldweights))); else fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob, sum_neglog_prob/(log(2)*(sum_yieldweights))); fflush(summaryfp); } if (tracefp && debuglevel >= 10000) { write_rule_values(tracefp, g, si, rule_counts, 0); fprintf(tracefp, "\n"); fflush(tracefp); } if (summaryfp && debuglevel >= 5000 && debuglevel < 10000) write_grammar(summaryfp, g, si, minruleprob); if (nrules==nrules0 && iteration >= minits && ((maxits > 0 && iteration >= maxits) || (sum_neglog_prob0-sum_neglog_prob)/fabs(sum_neglog_prob) < stoptol)) break; sum_neglog_prob0 = sum_neglog_prob; } FREE(rule_counts); return(sum_neglog_prob/(log(2)*sum_yieldweights)); }
bool Run(Node *node) // { //input const Tensor *input_tensor = node->GetInputTensor(0); float *input = (float *)get_tensor_mem(input_tensor); const TShape &in_shape = input_tensor->GetShape(); const std::vector<int> in_dims = in_shape.GetDim(); //output Tensor *output_tensor = node->GetOutputTensor(0); float *output = (float *)get_tensor_mem(output_tensor); const TShape &out_shape = output_tensor->GetShape(); const std::vector<int> out_dims = out_shape.GetDim(); //weight const Tensor *weight_tensor = node->GetInputTensor(1); float *weight = (float *)get_tensor_mem(weight_tensor); //bias const Tensor *bias_tensor = node->GetInputTensor(2); float *bias = (float *)get_tensor_mem(bias_tensor); //param Deconvolution *deconv_op = dynamic_cast<Deconvolution *>(node->GetOp()); DeconvParam *param_ = deconv_op->GetParam(); int pad = param_->pad; int stride = param_->stride; int ksize = param_->kernel_size; int dilation = param_->dilation; //buffer float * buffer = any_cast<float *>(node->GetAttr("buffer")); //shape int batch = in_dims[0]; int chw_in = in_dims[1]*in_dims[2]*in_dims[3]; int c_in = in_dims[1]; int h_in = in_dims[2]; int w_in = in_dims[3]; int c_out= out_dims[1]; int h_out= out_dims[2]; int w_out= out_dims[3]; int chw_out = c_out * h_out * w_out; int hw_out= out_dims[2]* out_dims[3]; int out_size=out_dims[0]*chw_out; memset(output,0,out_size*sizeof(float)); int m = ksize* ksize * c_out; int n = h_in * w_in; int k = c_in; for(int b = 0; b < batch; ++b) { float *inp = input + b*chw_in; float *out_ptr = output + b*chw_out; cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, m, n, k, 1, weight, m, inp, n, 0, buffer, n); col2im(buffer,out_ptr, c_out, h_out, w_out, ksize, stride, pad,dilation,h_in,w_in); add_bias(out_ptr, bias, c_out, hw_out); } return true; }