bool ConvolutionLayer::init() {
    if(initialized) return true;
    if((layer_param_.blobs_size() < 1) ||
       (layer_param_.convolution_param().bias_term() && 
	layer_param_.blobs_size() < 2)) return false;

    LOG(INFO) << "Starting to initialize " << this->name() << endl;

    const BlobProto& weights_blob = layer_param_.blobs(0);
    LOG(INFO) << "CONV WEIGHTS: " << endl << weights_blob.width() << endl
	      << weights_blob.height() << endl << weights_blob.channels() << endl
	      << weights_blob.num() << endl;

    weights_host_ = new float[weights_blob.width()*weights_blob.height()*
			      weights_blob.channels()*weights_blob.num()];
    Buffer weights_buf(type_of<float>(), weights_blob.width(), weights_blob.height(),
		      weights_blob.channels(), weights_blob.num(), 
		      (uint8_t*) (weights_host_));
    for(int x = 0, x_end = weights_blob.width(); x < x_end; x++) {
      for(int y = 0, y_end = weights_blob.height(); y < y_end; y++) {
	for(int c = 0, c_end = weights_blob.channels(); c < c_end; c++) {
	  for(int n = 0, n_end = weights_blob.num(); n < n_end; n++) {
	    int index = x + y*weights_blob.width() + c*weights_blob.width()*
	      weights_blob.height() + n*weights_blob.width()*
	      weights_blob.height()*weights_blob.channels();
	    weights_host_[index] = weights_blob.data(index);
	  }
	}
      }
    }
    
    if(!weights_.defined())
      weights_ = ImageParam(weights_buf.type(), 4);
    weights_.set(weights_buf);

    if(layer_param_.convolution_param().bias_term()) {
      const BlobProto bias_blob = layer_param_.blobs(1);
      bias_host_ = new float[bias_blob.width()*bias_blob.height()*
			     bias_blob.channels()*bias_blob.num()];
      Buffer bias_buf(type_of<float>(), bias_blob.width(), bias_blob.height(),
		      bias_blob.channels(), bias_blob.num(),(uint8_t*) bias_host_);

      LOG(INFO) << "CONV BIAS" << endl << bias_blob.width() << endl
		<< bias_blob.height() << endl << bias_blob.channels() << endl 
		<< bias_blob.num() << endl; 
      int bias_size = bias_blob.width()*bias_blob.height()*bias_blob.channels()*
	bias_blob.num();
      for(int i = 0; i < bias_size; i++) {
	bias_host_[i] = bias_blob.data(i);
      }
      if(!bias_.defined())
	bias_ = ImageParam(bias_buf.type(), 4);
      bias_.set(bias_buf);
    }

    LOG(INFO) << "Completed initializing " << this->name() << endl;

    initialized = true;
    return initialized;
  }
 ConvolutionLayer::~ConvolutionLayer() {
   if(weights_.defined())
     weights_ = ImageParam();
   if(bias_.defined())
     bias_ = ImageParam();
   if(weights_host_ != NULL) {
     delete[] weights_host_;
     weights_host_ = NULL;
   }
   if(bias_host_ != NULL) {
     delete[] bias_host_;
     bias_host_ = NULL;
   }
 }
Ejemplo n.º 3
0
SGridder::SGridder(SGridderConfig cfg) {
  // ** Input
  scale = Param<double>("scale");
  grid_size = Param<int>("grid_size");
  vis = ImageParam(type_of<double>(), 2, "vis");

  // GCF: Array of OxOxSxS complex numbers. We "fuse" two dimensions
  // as Halide only supports up to 4 dimensions.
  gcf_fused = ImageParam(type_of<double>(), 4, "gcf");

  // ** Output

  // Grid starts out undefined so we can update the output buffer
  F(uvg);
  uvg(cmplx, x, y) = undef<double>();

  // Get grid limits. This limits the uv pixel coordinates we accept
  // for the top-left corner of the GCF.
  Expr min_u = uvg.output_buffer().min(1);
  Expr max_u = uvg.output_buffer().min(1) + uvg.output_buffer().extent(1) - cfg.gcfSize - 1;
  Expr min_v = uvg.output_buffer().min(2);
  Expr max_v = uvg.output_buffer().min(2) + uvg.output_buffer().extent(2) - cfg.gcfSize - 1;

  // ** Helpers

  // Coordinate preprocessing
  Func Q(uvs);
  F(uv), F(overc);
  uvs(uvdim, t) = vis(uvdim, t) * scale;
  overc(uvdim, t) = clamp(cast<int>(round(OVER * (uvs(uvdim, t) - floor(uvs(uvdim, t))))), 0, OVER-1);
  uv(uvdim, t) = cast<int>(floor(uvs(uvdim, t)) + grid_size / 2 - cfg.gcfSize / 2);

  // Visibilities to ignore due to being out of bounds
  F(inBound);
  inBound(t) = uv(_U, t) >= min_u && uv(_U, t) <= max_u &&
               uv(_V, t) >= min_v && uv(_V, t) <= max_v;

  // GCF lookup for a given visibility
  Func Q(gcf);
  Var suppx("suppx"), suppy("suppy"), overx("overx"), overy("overy");
  gcf(suppx, suppy, t)
      = Complex(gcf_fused(_REAL, suppx, suppy, overc(_U, t) + OVER * overc(_V, t)),
                gcf_fused(_IMAG, suppx, suppy, overc(_U, t) + OVER * overc(_V, t)));

  // ** Definition

  // Reduction domain. Note that we iterate over time steps before
  // switching the GCF row in order to increase locality (Romein).
  typedef std::pair<Expr, Expr> rType;
  rType
      cRange = {0, _CPLX_FIELDS}
    , gRange = {0, cfg.gcfSize}
    , vRange = {0, cfg.steps}
    , blRange = {0, vis.height() / cfg.steps}
    ;

  std::vector<rType> rVec(5);
  rVec[cfg.cpos] = cRange;
  rVec[cfg.xpos] = gRange;
  rVec[cfg.ypos] = gRange;
  rVec[cfg.vpos] = vRange;
  rVec[cfg.blpos] = blRange;

  RDom red(rVec);
    rcmplx = red[cfg.cpos]
  , rgcfx  = red[cfg.xpos]
  , rgcfy  = red[cfg.ypos]
  , rstep  = red[cfg.vpos]
  , rbl    = red[cfg.blpos]
  ;
  Expr rvis = vis.top() + cfg.steps * rbl + rstep;

  // Get visibility as complex number
  Complex visC(vis(_R, rvis), vis(_I, rvis));

  // Update grid
  uvg(rcmplx,
      rgcfx + clamp(uv(_U, rvis), min_u, max_u),
      rgcfy + clamp(uv(_V, rvis), min_v, max_v))
    += select(inBound(rvis),
              (visC * Complex(gcf(rgcfx, rgcfy, rvis))).unpack(rcmplx),
              undef<double>());

  if (cfg.dim & (1 << _VIS0)) vis.set_min(0,0).set_stride(0,1).set_extent(0,_VIS_FIELDS);
  if (cfg.dim & (1 << _VIS1)) vis.set_stride(1,_VIS_FIELDS);

  if (cfg.dim & (1 << _GCF0)) gcf_fused.set_min(0,0).set_stride(0,1).set_extent(0,_CPLX_FIELDS);
  if (cfg.dim & (1 << _GCF1)) gcf_fused.set_min(1,0).set_stride(1,_CPLX_FIELDS).set_extent(1,cfg.gcfSize);
  if (cfg.dim & (1 << _GCF2)) gcf_fused.set_min(2,0).set_stride(2,_CPLX_FIELDS*cfg.gcfSize).set_extent(2,cfg.gcfSize);
  if (cfg.dim & (1 << _GCF3)) gcf_fused.set_min(3,0).set_stride(3,_CPLX_FIELDS*cfg.gcfSize*cfg.gcfSize).set_extent(3,OVER*OVER);

  if (cfg.dim & (1 << _UVG0)) uvg.output_buffer().set_stride(0,1).set_extent(0,_CPLX_FIELDS);
  if (cfg.dim & (1 << _UVG1)) uvg.output_buffer().set_stride(1,_CPLX_FIELDS);

  // Compute UV & oversampling coordinates per visibility
  overc.compute_at(uvg, rstep).vectorize(uvdim);
  uv.compute_at(uvg, rstep).vectorize(uvdim);
  inBound.compute_at(uvg, rstep);

  RVar rgcfxc("rgcfxc");
  switch(cfg.upd)
  {
  case _UPD_NONE:
      break;
  case _UPD_VECT:
      uvg.update()
          .allow_race_conditions()
          .vectorize(rcmplx);
      break;
  case _UPD_FUSE:
      uvg.update()
          .allow_race_conditions()
          .fuse(rgcfx, rcmplx, rgcfxc)
          .vectorize(rgcfxc, cfg.vector);
      break;
  case _UPD_FUSE_UNROLL:
      uvg.update()
          .allow_race_conditions()
          .fuse(rgcfx, rcmplx, rgcfxc)
          .vectorize(rgcfxc, cfg.vector)
          .unroll(rgcfxc, cfg.gcfSize * 2 / cfg.vector);
      break;
  case _UPD_UNROLL:
      uvg.update()
          .unroll(rcmplx);
      break;
  }
}