Beispiel #1
0
void forward_convolutional_layer(convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    /*
       if(l.binary){
       binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
       binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales);
       swap_binary(&l);
       }
     */

    if(l.binary){
        int m = l.n;
        int k = l.size*l.size*l.c;
        int n = out_h*out_w;

        char  *a = l.cfilters;
        float *b = state.workspace;
        float *c = l.output;

        for(i = 0; i < l.batch; ++i){
            im2col_cpu(state.input, l.c, l.h, l.w, 
                    l.size, l.stride, l.pad, b);
            gemm_bin(m,n,k,1,a,k,b,n,c,n);
            c += n*m;
            state.input += l.c*l.h*l.w;
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
        add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
        activate_array(l.output, m*n*l.batch, l.activation);
        return;
    }

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = state.workspace;
    float *c = l.output;

    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}
Beispiel #2
0
void forward_batchnorm_layer(layer l, network_state state)
{
    if(l.type == BATCHNORM) copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
    if(l.type == CONNECTED){
        l.out_c = l.outputs;
        l.out_h = l.out_w = 1;
    }
    if(state.train){
        mean_cpu(l.output, l.batch, l.out_c, l.out_h*l.out_w, l.mean);
        variance_cpu(l.output, l.mean, l.batch, l.out_c, l.out_h*l.out_w, l.variance);

        scal_cpu(l.out_c, .99, l.rolling_mean, 1);
        axpy_cpu(l.out_c, .01, l.mean, 1, l.rolling_mean, 1);
        scal_cpu(l.out_c, .99, l.rolling_variance, 1);
        axpy_cpu(l.out_c, .01, l.variance, 1, l.rolling_variance, 1);

        copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
        normalize_cpu(l.output, l.mean, l.variance, l.batch, l.out_c, l.out_h*l.out_w);   
        copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
    } else {
        normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.out_c, l.out_h*l.out_w);
    }
    scale_bias(l.output, l.scales, l.batch, l.out_c, l.out_h*l.out_w);
    add_bias(l.output, l.biases, l.batch, l.out_c, l.out_h*l.out_w);
}
Beispiel #3
0
void forward_deconvolutional_layer(const layer l, network_state state)
{
    int i;
    int out_h = l.out_h;
    int out_w = l.out_w;
    int size = out_h*out_w;

    int m = l.size*l.size*l.n;
    int n = l.h*l.w;
    int k = l.c;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    for(i = 0; i < l.batch; ++i){
        float *a = l.weights;
        float *b = state.input + i*l.c*l.h*l.w;
        float *c = state.workspace;

        gemm(1,0,m,n,k,1,a,m,b,n,0,c,n);

        col2im_cpu(c, l.n, out_h, out_w, l.size, l.stride, 0, l.output+i*l.n*size);
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, state);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }
    activate_array(l.output, l.batch*l.n*size, l.activation);
}
void forward_deconvolutional_layer(const layer l, network net) {
	int i;

	int m = l.size * l.size * l.n;
	int n = l.h * l.w;
	int k = l.c;

	fill_cpu(l.outputs * l.batch, 0, l.output, 1);

	for (i = 0; i < l.batch; ++i) {
		real_t *a = l.weights;
		real_t *b = net.input + i * l.c * l.h * l.w;
		real_t *c = net.workspace;

		gemm_cpu(1, 0, m, n, k, 1, a, m, b, n, 0, c, n);

		col2im_cpu(net.workspace, l.out_c, l.out_h, l.out_w, l.size, l.stride,
				l.pad, l.output + i * l.outputs);
	}
	if (l.batch_normalize) {
		forward_batchnorm_layer(l, net);
	} else {
		add_bias(l.output, l.biases, l.batch, l.n, l.out_w * l.out_h);
	}
	activate_array(l.output, l.batch * l.n * l.out_w * l.out_h, l.activation);
}
Beispiel #5
0
float_utilst::biased_floatt float_utilst::bias(const unbiased_floatt &src)
{
  biased_floatt result;

  result.sign=src.sign;
  result.NaN=src.NaN;
  result.infinity=src.infinity;

  // we need to bias the new exponent
  result.exponent=add_bias(src.exponent);

  // strip off hidden bit
  assert(src.fraction.size()==spec.f+1);

  literalt hidden_bit=src.fraction[src.fraction.size()-1];
  literalt denormal=!hidden_bit;

  result.fraction=src.fraction;
  result.fraction.resize(spec.f);

  // make exponent zero if its denormal
  // (includes zero)
  for(std::size_t i=0; i<result.exponent.size(); i++)
    result.exponent[i]=
      prop.land(result.exponent[i], !denormal);

  return result;
}
void forward_convolutional_layer(convolutional_layer l, network net)
{
    int i, j;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    if(l.xnor){
        binarize_weights(l.weights, l.n, l.c/l.groups*l.size*l.size, l.binary_weights);
        swap_binary(&l);
        binarize_cpu(net.input, l.c*l.h*l.w*l.batch, l.binary_input);
        net.input = l.binary_input;
    }

    int m = l.n/l.groups;
    int k = l.size*l.size*l.c/l.groups;
    int n = l.out_w*l.out_h;
    for(i = 0; i < l.batch; ++i){
        for(j = 0; j < l.groups; ++j){
            float *a = l.weights + j*l.nweights/l.groups;
            float *b = net.workspace;
            float *c = l.output + (i*l.groups + j)*n*m;

            im2col_cpu(net.input + (i*l.groups + j)*l.c/l.groups*l.h*l.w,
                l.c/l.groups, l.h, l.w, l.size, l.stride, l.pad, b);
            gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        }
    }

    if(l.batch_normalize){
        forward_batchnorm_layer(l, net);
    } else {
        add_bias(l.output, l.biases, l.batch, l.n, l.out_h*l.out_w);
    }

    activate_array(l.output, l.outputs*l.batch, l.activation);
    if(l.binary || l.xnor) swap_binary(&l);
}
void forward_convolutional_layer(const convolutional_layer l, network_state state)
{
    int out_h = convolutional_out_height(l);
    int out_w = convolutional_out_width(l);
    int i;

    fill_cpu(l.outputs*l.batch, 0, l.output, 1);

    int m = l.n;
    int k = l.size*l.size*l.c;
    int n = out_h*out_w;

    float *a = l.filters;
    float *b = l.col_image;
    float *c = l.output;
//	printf("the l.size is %i \n", l.size);
///*

//printf("the m,k,n is %i,%i,%i \n", m,k,n);
    for(i = 0; i < l.batch; ++i){
        im2col_cpu(state.input, l.c, l.h, l.w, 
                l.size, l.stride, l.pad, b);
        gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
        c += n*m;
        state.input += l.c*l.h*l.w;
    }
//*/

//add by fanghao
 /*   int ii,jj,kk,mm,pp,tt;
    int lcc = l.c;
    int lhh = l.h;
    int lww = l.w;
    int kernel = l.size;
    int pad;
    if(l.pad)
         pad = l.size/2;
    else
	 pad = l.pad;
    lhh += 2*pad;
    lww += 2*pad;
    float *dataP;
    dataP = (float *)calloc(lcc*lhh*lww, sizeof(float));


//printf("the l.h is %i \n", l.h);
//printf("the l.w is %i \n", l.w);
//printf("the lhh is %i \n", lhh);
//printf("the lww is %i \n", lww);
//printf("the pad is %i \n", pad);




    for(ii=0; ii < lcc; ii++)
        for(jj=pad; jj<lhh-pad; jj++)
             for(kk=pad; kk<lww-pad; kk++)
                dataP[ii*lhh*lww + jj*lww + kk] = state.input[ii*(lhh - 2*pad)*(lww-2*pad) + (jj - pad)*(lww - 2*pad) + kk-pad];

    for(ii=0; ii<m; ii++)
        for(jj=0; jj<out_h; jj++)
            for(kk=0; kk<out_w; kk++) {
                float tempAcc = 0.0;
                    for(mm=0; mm<lcc; mm++)
                        for(pp=0; pp<kernel; pp++)
                            for(tt=0; tt<kernel; tt++)
                                tempAcc += a[ii*lcc*kernel*kernel+mm*kernel*kernel+pp*kernel+tt]*dataP[mm*lhh*lww+(l.stride*jj+pp)*lww+l.stride*kk+tt];
                c[ii*out_h*out_w+jj*out_w+kk] = tempAcc;
                        }
//	c += n*m;
//state.input += l.c*l.h*l.w;
//
*/



    if(l.batch_normalize){
        if(state.train){
            mean_cpu(l.output, l.batch, l.n, l.out_h*l.out_w, l.mean);   
            variance_cpu(l.output, l.mean, l.batch, l.n, l.out_h*l.out_w, l.variance);   
            normalize_cpu(l.output, l.mean, l.variance, l.batch, l.n, l.out_h*l.out_w);   
        } else {
            normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.n, l.out_h*l.out_w);
        }
        scale_bias(l.output, l.scales, l.batch, l.n, out_h*out_w);
    }
    add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);

    activate_array(l.output, m*n*l.batch, l.activation);
}
Beispiel #8
0
FLOAT
inside_outside(grammar g, const si_t si, FILE *yieldfp, 
	       FILE *tracefp, FILE *summaryfp, int debuglevel,
	       int maxsentlen, int minits, int maxits,
	       FLOAT stoptol, FLOAT minruleprob,
	       FLOAT jitter, int VariationalBayes, FLOAT wordscale,
	       FLOAT annealstart, FLOAT annealstop, int nanneal,
	       int weighted_yields_flag)
{
  FLOAT *rule_counts = CALLOC(g->nrules, sizeof(FLOAT));
  FLOAT sum_neglog_prob0;
  FLOAT sum_neglog_prob;
  int   iteration = 0;
  size_t nrules, nrules0;
  FLOAT sum_yieldweights;
  FLOAT temperature = annealstart;

  nrules = g->nrules;

  if (summaryfp && debuglevel >= 1000) {
    if (debuglevel < 5000)
      fprintf(summaryfp, "# Iteration\ttemperature\tnrules\t-logP\tbits/token\n%d\t%g\t%d", 
	      iteration, temperature, (int) nrules);
    else
      fprintf(summaryfp, "# Iteration %d, temperature = %g, %d rules, ",
	      iteration, temperature, (int) nrules);
    fflush(summaryfp);
  }

  sum_neglog_prob0 = expected_rule_counts(g, si, yieldfp, tracefp, 
					  summaryfp, debuglevel,
					  maxsentlen, minruleprob, wordscale,
					  rule_counts, &sum_yieldweights,
					  weighted_yields_flag);

  if (summaryfp && debuglevel >= 1000) {
    if (debuglevel < 5000)
      fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob0,
	      sum_neglog_prob0/(log(2)*(sum_yieldweights)));
    else
      fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob0,
	      sum_neglog_prob0/(log(2)*(sum_yieldweights)));
    fflush(summaryfp);
  }

  if (tracefp && debuglevel >= 10000) {
    write_rule_values(tracefp, g, si, rule_counts, 0);
    fprintf(tracefp, "\n");
    fflush(tracefp);
  }

  if (summaryfp && debuglevel >= 5000 && debuglevel < 10000)
    write_grammar(summaryfp, g, si, minruleprob);      

  while (1) {
    ++iteration;

    add_bias(g, rule_counts);
    set_rule_weights(g, rule_counts, VariationalBayes);
    prune_grammar(g, si, minruleprob);
    if (jitter != 0) 
      jitter_weights(g, jitter);
    set_rule_weights(g, g->weights, 0);
    if (iteration < nanneal) {
      temperature = annealstart*pow(annealstop/annealstart, (iteration-1.0)/(nanneal-1.0));
      scale_weights(g, 1.0/temperature);
    }
    else
      temperature = 1.0;
    nrules0 = nrules;
    nrules = g->nrules;

    if (summaryfp && debuglevel >= 1000) {
      if (debuglevel < 5000)
	fprintf(summaryfp, "%d\t%g\t%d", iteration, temperature, (int) nrules);
      else
	fprintf(summaryfp, "# Iteration %d, temperature %g, %d rules, ",
		iteration, temperature, (int) nrules);
      fflush(summaryfp);
    }

    sum_neglog_prob = expected_rule_counts(g, si, yieldfp, tracefp, summaryfp, debuglevel,
					   maxsentlen, minruleprob, wordscale,
					   rule_counts, &sum_yieldweights, weighted_yields_flag);

    if (summaryfp && debuglevel >= 1000) {
      if (debuglevel < 5000)
	fprintf(summaryfp, "\t%g\t%g\n", sum_neglog_prob,
		sum_neglog_prob/(log(2)*(sum_yieldweights)));
      else
	fprintf(summaryfp, "-logP = %g, bits/token = %g.\n", sum_neglog_prob,
		sum_neglog_prob/(log(2)*(sum_yieldweights)));
      fflush(summaryfp);
    }

    if (tracefp && debuglevel >= 10000) {
      write_rule_values(tracefp, g, si, rule_counts, 0);
      fprintf(tracefp, "\n");
      fflush(tracefp);
    }
    
    if (summaryfp && debuglevel >= 5000 && debuglevel < 10000)
      write_grammar(summaryfp, g, si, minruleprob);      

    if (nrules==nrules0 &&
	iteration >= minits &&
	((maxits > 0 && iteration >= maxits)
	 || (sum_neglog_prob0-sum_neglog_prob)/fabs(sum_neglog_prob) < stoptol))
      break;

    sum_neglog_prob0 = sum_neglog_prob;
  }

  FREE(rule_counts);

  return(sum_neglog_prob/(log(2)*sum_yieldweights));
}
Beispiel #9
0
    bool Run(Node *node) //
    {
        //input
        const Tensor *input_tensor = node->GetInputTensor(0);
        float *input = (float *)get_tensor_mem(input_tensor);
        const TShape &in_shape = input_tensor->GetShape();
        const std::vector<int> in_dims = in_shape.GetDim();

        //output
        Tensor *output_tensor = node->GetOutputTensor(0);
        float *output = (float *)get_tensor_mem(output_tensor);
        const TShape &out_shape = output_tensor->GetShape();
        const std::vector<int> out_dims = out_shape.GetDim();

        //weight
        const Tensor *weight_tensor = node->GetInputTensor(1);
        float *weight = (float *)get_tensor_mem(weight_tensor);

        //bias
        const Tensor *bias_tensor = node->GetInputTensor(2);
        float *bias = (float *)get_tensor_mem(bias_tensor);

        //param
        Deconvolution *deconv_op = dynamic_cast<Deconvolution *>(node->GetOp());
        DeconvParam *param_ = deconv_op->GetParam();
        int pad = param_->pad;
        int stride = param_->stride;
        int ksize = param_->kernel_size;
        int dilation = param_->dilation;

        //buffer
        float * buffer  = any_cast<float *>(node->GetAttr("buffer"));

        //shape
        int batch = in_dims[0];
        int chw_in = in_dims[1]*in_dims[2]*in_dims[3];
        int c_in = in_dims[1];
        int h_in = in_dims[2];
        int w_in = in_dims[3];
        int c_out= out_dims[1];
        int h_out= out_dims[2];
        int w_out= out_dims[3];
        int chw_out = c_out * h_out * w_out;
        int hw_out= out_dims[2]* out_dims[3];
        int out_size=out_dims[0]*chw_out;

        memset(output,0,out_size*sizeof(float));
        int m = ksize* ksize * c_out;
        int n = h_in * w_in;
        int k = c_in;

        for(int b = 0; b < batch; ++b)
        {
            float *inp = input + b*chw_in;
            float *out_ptr = output + b*chw_out;

            cblas_sgemm(CblasRowMajor, CblasTrans, CblasNoTrans, 
                            m, n, k, 1, weight, m, inp, n, 0, buffer, n);

            col2im(buffer,out_ptr, c_out, h_out, w_out, 
                 ksize, stride, pad,dilation,h_in,w_in);

            add_bias(out_ptr, bias, c_out, hw_out);
        }
        
        return true;
    }