bool ConvolutionLayer::init() { if(initialized) return true; if((layer_param_.blobs_size() < 1) || (layer_param_.convolution_param().bias_term() && layer_param_.blobs_size() < 2)) return false; LOG(INFO) << "Starting to initialize " << this->name() << endl; const BlobProto& weights_blob = layer_param_.blobs(0); LOG(INFO) << "CONV WEIGHTS: " << endl << weights_blob.width() << endl << weights_blob.height() << endl << weights_blob.channels() << endl << weights_blob.num() << endl; weights_host_ = new float[weights_blob.width()*weights_blob.height()* weights_blob.channels()*weights_blob.num()]; Buffer weights_buf(type_of<float>(), weights_blob.width(), weights_blob.height(), weights_blob.channels(), weights_blob.num(), (uint8_t*) (weights_host_)); for(int x = 0, x_end = weights_blob.width(); x < x_end; x++) { for(int y = 0, y_end = weights_blob.height(); y < y_end; y++) { for(int c = 0, c_end = weights_blob.channels(); c < c_end; c++) { for(int n = 0, n_end = weights_blob.num(); n < n_end; n++) { int index = x + y*weights_blob.width() + c*weights_blob.width()* weights_blob.height() + n*weights_blob.width()* weights_blob.height()*weights_blob.channels(); weights_host_[index] = weights_blob.data(index); } } } } if(!weights_.defined()) weights_ = ImageParam(weights_buf.type(), 4); weights_.set(weights_buf); if(layer_param_.convolution_param().bias_term()) { const BlobProto bias_blob = layer_param_.blobs(1); bias_host_ = new float[bias_blob.width()*bias_blob.height()* bias_blob.channels()*bias_blob.num()]; Buffer bias_buf(type_of<float>(), bias_blob.width(), bias_blob.height(), bias_blob.channels(), bias_blob.num(),(uint8_t*) bias_host_); LOG(INFO) << "CONV BIAS" << endl << bias_blob.width() << endl << bias_blob.height() << endl << bias_blob.channels() << endl << bias_blob.num() << endl; int bias_size = bias_blob.width()*bias_blob.height()*bias_blob.channels()* bias_blob.num(); for(int i = 0; i < bias_size; i++) { bias_host_[i] = bias_blob.data(i); } if(!bias_.defined()) bias_ = ImageParam(bias_buf.type(), 4); bias_.set(bias_buf); } LOG(INFO) << "Completed initializing " << this->name() << endl; initialized = true; return initialized; }
ConvolutionLayer::~ConvolutionLayer() { if(weights_.defined()) weights_ = ImageParam(); if(bias_.defined()) bias_ = ImageParam(); if(weights_host_ != NULL) { delete[] weights_host_; weights_host_ = NULL; } if(bias_host_ != NULL) { delete[] bias_host_; bias_host_ = NULL; } }
SGridder::SGridder(SGridderConfig cfg) { // ** Input scale = Param<double>("scale"); grid_size = Param<int>("grid_size"); vis = ImageParam(type_of<double>(), 2, "vis"); // GCF: Array of OxOxSxS complex numbers. We "fuse" two dimensions // as Halide only supports up to 4 dimensions. gcf_fused = ImageParam(type_of<double>(), 4, "gcf"); // ** Output // Grid starts out undefined so we can update the output buffer F(uvg); uvg(cmplx, x, y) = undef<double>(); // Get grid limits. This limits the uv pixel coordinates we accept // for the top-left corner of the GCF. Expr min_u = uvg.output_buffer().min(1); Expr max_u = uvg.output_buffer().min(1) + uvg.output_buffer().extent(1) - cfg.gcfSize - 1; Expr min_v = uvg.output_buffer().min(2); Expr max_v = uvg.output_buffer().min(2) + uvg.output_buffer().extent(2) - cfg.gcfSize - 1; // ** Helpers // Coordinate preprocessing Func Q(uvs); F(uv), F(overc); uvs(uvdim, t) = vis(uvdim, t) * scale; overc(uvdim, t) = clamp(cast<int>(round(OVER * (uvs(uvdim, t) - floor(uvs(uvdim, t))))), 0, OVER-1); uv(uvdim, t) = cast<int>(floor(uvs(uvdim, t)) + grid_size / 2 - cfg.gcfSize / 2); // Visibilities to ignore due to being out of bounds F(inBound); inBound(t) = uv(_U, t) >= min_u && uv(_U, t) <= max_u && uv(_V, t) >= min_v && uv(_V, t) <= max_v; // GCF lookup for a given visibility Func Q(gcf); Var suppx("suppx"), suppy("suppy"), overx("overx"), overy("overy"); gcf(suppx, suppy, t) = Complex(gcf_fused(_REAL, suppx, suppy, overc(_U, t) + OVER * overc(_V, t)), gcf_fused(_IMAG, suppx, suppy, overc(_U, t) + OVER * overc(_V, t))); // ** Definition // Reduction domain. Note that we iterate over time steps before // switching the GCF row in order to increase locality (Romein). typedef std::pair<Expr, Expr> rType; rType cRange = {0, _CPLX_FIELDS} , gRange = {0, cfg.gcfSize} , vRange = {0, cfg.steps} , blRange = {0, vis.height() / cfg.steps} ; std::vector<rType> rVec(5); rVec[cfg.cpos] = cRange; rVec[cfg.xpos] = gRange; rVec[cfg.ypos] = gRange; rVec[cfg.vpos] = vRange; rVec[cfg.blpos] = blRange; RDom red(rVec); rcmplx = red[cfg.cpos] , rgcfx = red[cfg.xpos] , rgcfy = red[cfg.ypos] , rstep = red[cfg.vpos] , rbl = red[cfg.blpos] ; Expr rvis = vis.top() + cfg.steps * rbl + rstep; // Get visibility as complex number Complex visC(vis(_R, rvis), vis(_I, rvis)); // Update grid uvg(rcmplx, rgcfx + clamp(uv(_U, rvis), min_u, max_u), rgcfy + clamp(uv(_V, rvis), min_v, max_v)) += select(inBound(rvis), (visC * Complex(gcf(rgcfx, rgcfy, rvis))).unpack(rcmplx), undef<double>()); if (cfg.dim & (1 << _VIS0)) vis.set_min(0,0).set_stride(0,1).set_extent(0,_VIS_FIELDS); if (cfg.dim & (1 << _VIS1)) vis.set_stride(1,_VIS_FIELDS); if (cfg.dim & (1 << _GCF0)) gcf_fused.set_min(0,0).set_stride(0,1).set_extent(0,_CPLX_FIELDS); if (cfg.dim & (1 << _GCF1)) gcf_fused.set_min(1,0).set_stride(1,_CPLX_FIELDS).set_extent(1,cfg.gcfSize); if (cfg.dim & (1 << _GCF2)) gcf_fused.set_min(2,0).set_stride(2,_CPLX_FIELDS*cfg.gcfSize).set_extent(2,cfg.gcfSize); if (cfg.dim & (1 << _GCF3)) gcf_fused.set_min(3,0).set_stride(3,_CPLX_FIELDS*cfg.gcfSize*cfg.gcfSize).set_extent(3,OVER*OVER); if (cfg.dim & (1 << _UVG0)) uvg.output_buffer().set_stride(0,1).set_extent(0,_CPLX_FIELDS); if (cfg.dim & (1 << _UVG1)) uvg.output_buffer().set_stride(1,_CPLX_FIELDS); // Compute UV & oversampling coordinates per visibility overc.compute_at(uvg, rstep).vectorize(uvdim); uv.compute_at(uvg, rstep).vectorize(uvdim); inBound.compute_at(uvg, rstep); RVar rgcfxc("rgcfxc"); switch(cfg.upd) { case _UPD_NONE: break; case _UPD_VECT: uvg.update() .allow_race_conditions() .vectorize(rcmplx); break; case _UPD_FUSE: uvg.update() .allow_race_conditions() .fuse(rgcfx, rcmplx, rgcfxc) .vectorize(rgcfxc, cfg.vector); break; case _UPD_FUSE_UNROLL: uvg.update() .allow_race_conditions() .fuse(rgcfx, rcmplx, rgcfxc) .vectorize(rgcfxc, cfg.vector) .unroll(rgcfxc, cfg.gcfSize * 2 / cfg.vector); break; case _UPD_UNROLL: uvg.update() .unroll(rcmplx); break; } }