void SpatialBatchNormalization::init(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TENSOR_DATA);

  Tensor<float>* in = TO_TENSOR_PTR(input.get());
  Tensor<float>* out = TO_TENSOR_PTR(output.get());

  RASSERT(in->dim() >= 3);
  RASSERT(in->size()[2] == nfeats_);

  if (output != nullptr && in->dim() != out->dim()) {
    output = nullptr;
  }

  // Check that the input and output size are the same.
  if (output != nullptr) {
    if (in->size()[0] != out->size()[0] ||
        in->size()[1] != out->size()[1] ||
        in->size()[2] != out->size()[2]) {
      output = nullptr;
    }
  }

  if (output == nullptr) {
    output.reset(new Tensor<float>(in->dim(), in->size()));
  }
}
Esempio n. 2
0
static __inline__ RunStore*
runstore_from_runs( Run*  runs )
{
    RASSERT(runs != RUNS_EMPTY);
    RASSERT(runs != RUNS_RECT);
    return (RunStore*)runs - 1;
}
Esempio n. 3
0
static void
region_operator_init( RegionOperator*  o,
                      Region*          r1,
                      Region*          r2 )
{
    int  run1_count, run2_count;
    int  maxruns;

    RASSERT( !region_isEmpty(r1) );
    RASSERT( !region_isEmpty(r2) );

    if (region_isRect(r1)) {
        run1_count = RUNS_RECT_COUNT;
        o->runs1   = o->runs1_rect;
        runs_set_rect( o->runs1, &r1->bounds );
    } else {
        o->runs1   = r1->runs;
        run1_count = runs_get_count(r1->runs);
    }

    if (region_isRect(r2)) {
        run2_count = RUNS_RECT_COUNT;
        o->runs2   = o->runs2_rect;
        runs_set_rect( o->runs2, &r2->bounds );
    } else {
        o->runs2   = r2->runs;
        run2_count = runs_get_count(r2->runs);
    }

    maxruns  = run1_count < run2_count ? run2_count : run1_count;
    o->store = runstore_alloc( 3*maxruns );
    o->runs_base = runstore_to_runs(o->store);
}
Esempio n. 4
0
void
skin_region_translate( SkinRegion*  r, int  dx, int  dy )
{
    Run*  runs;

    if (region_isEmpty(r))
        return;

    skin_rect_translate( &r->bounds, dx, dy );
    if (region_isRect(r))
        return;

    runs = region_edit(r);
    while (runs[0] != YSENTINEL) {
        int  ytop = runs[0];
        int  ybot = runs[1];

        RASSERT(ybot != YSENTINEL);
        runs[0] = (Run)(ytop + dy);
        runs[1] = (Run)(ybot + dy);
        runs += 2;
        while (runs[0] != XSENTINEL) {
            int  xleft  = runs[0];
            int  xright = runs[1];
            RASSERT(xright != YSENTINEL);
            runs[0] = (Run)(xleft + dx);
            runs[1] = (Run)(xright + dx);
            runs += 2;
        }
        runs += 1;
    }
}
Esempio n. 5
0
void OpenCLContext::runKernel(const uint32_t device_index, const uint32_t dim,
                              const uint32_t* global_work_size,
                              const bool blocking) {
  // You must call OpenCL::useKernel() first.
  RASSERT(cur_kernel_ != nullptr);
  RASSERT(device_index < devices_.size());
  RASSERT(dim <= 3);  // OpenCL doesn't support greater than 3 dims!

  cl::NDRange offset = cl::NullRange;
  cl::NDRange global_work;
  cl::NDRange local_work = cl::NullRange;  // Let OpenCL Choose
  switch (dim) {
    case 1:
      global_work = cl::NDRange(global_work_size[0]);
      break;
    case 2:
      global_work = cl::NDRange(global_work_size[0], global_work_size[1]);
      break;
    case 3:
      global_work = cl::NDRange(global_work_size[0], global_work_size[1],
                                global_work_size[2]);
      break;
  }
  cl::Event cur_event;
  CHECK_ERROR(queues_[device_index].enqueueNDRangeKernel(
      cur_kernel_->kernel(), offset, global_work, local_work, nullptr,
      &cur_event));

  if (blocking) {
    cur_event.wait();
  }
}
void SpatialLPPooling::init(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TENSOR_DATA);
  Tensor<float>* in = TO_TENSOR_PTR(input.get());
  RASSERT(in->dim() == 2 || in->dim() == 3);

  if (output != nullptr && TO_TENSOR_PTR(output.get())->dim() != in->dim()) {
    // Input dimension has changed!
    cleanup();
  }

  if (output != nullptr) {
    // Check that the dimensions above the lowest 2 match
    for (uint32_t i = 2; i < in->dim() && output != nullptr; i++) {
      if (TO_TENSOR_PTR(output.get())->size()[i] != in->size()[i]) {
        cleanup();
      }
    }
  }

  if (output != nullptr) {
    // Check that the lowest 2 dimensions are the correct size
    if (TO_TENSOR_PTR(output.get())->size()[0] != in->size()[0] / poolsize_u_ ||
        TO_TENSOR_PTR(output.get())->size()[1] != in->size()[1] / poolsize_v_) {
      cleanup();
    }
  }

  if (output == nullptr) {
    // Check that the width and height is a multiple of the poolsize
    RASSERT(in->size()[0] % poolsize_u_ == 0 &&
           in->size()[1] % poolsize_v_ == 0);

    std::unique_ptr<uint32_t[]> out_size(new uint32_t[in->dim()]);
    out_size[0] = in->size()[0] / poolsize_u_;
    out_size[1] = in->size()[1] / poolsize_v_;
    for (uint32_t i = 2; i < in->dim(); i++) {
      out_size[i] = in->size()[i];
    }

    output.reset(new Tensor<float>(in->dim(), out_size.get()));
    input_cpu_.reset(new float[in->nelems()]);
    output_cpu_.reset(new float[TO_TENSOR_PTR(output.get())->nelems()]);
  }

  uint32_t n_threads = 1;
  if (in->dim() > 2) {
    n_threads = TO_TENSOR_PTR(output.get())->size()[2];
  }
  if (thread_cbs_.size() != n_threads) {
    thread_cbs_.empty();
    for (uint32_t f = 0; f < n_threads; f++) {
      thread_cbs_.push_back(std::unique_ptr<jcl::threading::Callback<void>>(
          MakeCallableMany(&SpatialLPPooling::forwardPropThread, this, f)));
    }
  }
}
Esempio n. 7
0
void SelectTable::forwardProp(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TABLE_DATA);

  Table* in = (Table*)input.get();

  // Check that the input table isn't too small.
  RASSERT(in->tableSize() > index_);

  output = (*in)(index_);
}
Esempio n. 8
0
void ParallelTable::forwardProp(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TABLE_DATA);

  Table* in = (Table*)input.get();
  // Make sure table size matches the number of parallel stages:
  RASSERT(in->tableSize() == network_.size());
  for (uint32_t i = 0; i < network_.size(); i++) {
    network_[i]->forwardProp((*in)(i));
  }
  initOutput();  // Init output just copies the pointers from the output
                 // of all the parallel stages and fills up a table with them
}
Esempio n. 9
0
uint32_t OpenCLContext::queryMaxWorkgroupSizeForCurKernel(
    const uint32_t device_index) {
  // You must call OpenCL::useKernel() first.
  RASSERT(cur_kernel_ != nullptr);
  RASSERT(device_index < devices_.size());

  size_t max_workgroup_size;
  cl_int rc = cur_kernel_->kernel().getWorkGroupInfo<size_t>(
      devices_[device_index], CL_KERNEL_WORK_GROUP_SIZE, &max_workgroup_size);
  RASSERT(rc == CL_SUCCESS);
  return (uint32_t)max_workgroup_size;
}
Esempio n. 10
0
void JoinTable::init(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TABLE_DATA);  // Table expected

  Table* in = TO_TABLE_PTR(input.get());

  RASSERT(in->tableSize() > 0);

  // Check that it is a table of FloatTensors
  for (uint32_t i = 0; i < in->tableSize(); i++) {
    // Table of float tensors expected
    RASSERT((*in)(i)->type() == TENSOR_DATA);
  }

  uint32_t dim = TO_TENSOR_PTR((*in)(0).get())->dim();
  RASSERT(dim > dimension_);  // Otherwise input is smaller than join dimension
  uint32_t jdim = dim - dimension_ - 1;  // dimension_=0 is the top dim

  // Make sure the dimensions OTHER than the join dimension are all the same
  for (uint32_t d = 0; d < dim; d++) {
    if (d != jdim) {
      for (uint32_t j = 1; j < in->tableSize(); j++) {
        // sizes must match
        RASSERT(TO_TENSOR_PTR((*in)(j).get())->size()[d] ==
               TO_TENSOR_PTR((*in)(0).get())->size()[d]);
      }
      if (output != nullptr &&
          TO_TENSOR_PTR(output.get())->size()[d] !=
              TO_TENSOR_PTR((*in)(0).get())->size()[d]) {
        output = nullptr;
      }
    }
  }

  uint32_t nelems_jdim = 0;
  for (uint32_t j = 1; j < in->tableSize(); j++) {
    nelems_jdim += TO_TENSOR_PTR((*in)(j).get())->size()[jdim];
  }

  if (output != nullptr &&
      TO_TENSOR_PTR(output.get())->size()[jdim] != nelems_jdim) {
    output = nullptr;
  }

  if (output == nullptr) {
    std::unique_ptr<uint32_t[]> size(new uint32_t[dim]);
    memcpy(size.get(), TO_TENSOR_PTR((*in)(0).get())->size(),
           sizeof(size[0]) * dim);
    size[dimension_] = nelems_jdim;
    output = std::shared_ptr<TorchData>(new Tensor<float>(dim, size.get()));
  }
}
Esempio n. 11
0
void Select::forwardProp(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TENSOR_DATA);

  // For now we only support the Select operation along the outer dimension.
  // In torch indexing this is always 1.
  RASSERT(this->dimension_ == 1);

  if (src_tensor_ != input.get()) {
    // Only create the tensor slice if the input has changed.
    src_tensor_ = TO_TENSOR_PTR(input.get());

    // Note the index is torch 1-indexed.
    output = Tensor<float>::selectOuterDim(*src_tensor_, this->index_ - 1);
  }
}
Esempio n. 12
0
static Run*
runs_copy_scanline( Run*  dst, Run*  src )
{
    RASSERT(src[0] != YSENTINEL);
    RASSERT(src[1] != YSENTINEL);
    dst[0] = src[0];
    dst[1] = src[1];
    src += 2;
    dst += 2;
    do {
        *dst++ = *src++;
    }
    while (src[-1] != XSENTINEL);
    return dst;
}
void comm_mutex_rw::rd_lock()
{
	/// possible dead locks:	th1:R-lock, th2:wr_lock(), th1:rd_lock()
	DWORD t = GetCurrentThreadId();
	if( Waccess(t) ) {						/// already W-locked
        _nRin++;
		RASSERT( _nWlocks != 0 );
		return;
	}

	comm_mutex_rw_LOCK_IN;
    _nRin++;
    RASSERT( _nWlocks == 0 );
	ResetEvent( (HANDLE)_cndAccess );
    comm_mutex_rw_UNLOCK_IN;
}
bool comm_mutex_rw::try_wr_lock()
{
#if( _WIN32_WINNT >= 0x0400 )
	DWORD t = GetCurrentThreadId();
	if( Waccess(t) ) {									/// already W-locked ==> guard only one W-lock
		_nWlocks++;
		return true;
	}

	if( TryEnterCriticalSection((CRITICAL_SECTION*)&_mxIN) ) {			/// lock in mutex
		RASSERT( _nWlocks == 0 );

		if( TryEnterCriticalSection((CRITICAL_SECTION*)&_mxOUT) ) {	    /// lock out mutex
			if( Raccess() ) {						/// no readers ?
				_nWlocks++;
				_W_owner = t;
				comm_mutex_rw_UNLOCK_OUT;
				return true;
			}
			comm_mutex_rw_UNLOCK_OUT;
		}
		comm_mutex_rw_UNLOCK_IN;
	}
#endif

	return false;
}
Esempio n. 15
0
CLDevice OpenCLContext::CLDeviceType2CLDevice(const cl_device_type device) {
  CLDevice ret;
  switch (device) {
    case CL_DEVICE_TYPE_DEFAULT:
      ret = CLDeviceDefault;
      break;
    case CL_DEVICE_TYPE_CPU:
      ret = CLDeviceCPU;
      break;
    case CL_DEVICE_TYPE_GPU:
      ret = CLDeviceGPU;
      break;
    case CL_DEVICE_TYPE_ACCELERATOR:
      ret = CLDeviceAccelerator;
      break;
    case CL_DEVICE_TYPE_ALL:
      ret = CLDeviceAll;
      break;
    default:
      std::cout << "Invalid enumerant" << std::endl;
      RASSERT(false);
      ret = CLDeviceDefault;
      break;
  }
  return ret;
}
Esempio n. 16
0
void InitJTorch(const bool use_cpu, const uint32_t requested_deviceid,
                const bool verbose_startup) {
  std::lock_guard<std::mutex> lck(cl_context_lock_);
  // Check we haven't already called init.
  RASSERT(cl_context == nullptr);

  if (verbose_startup) {
    std::cout << "Valid OpenCL devices attached:" << std::endl;
    const uint32_t num_devices = jcl::OpenCLContext::printDevices();
    static_cast<void>(num_devices);
  }

  jcl::CLDevice device = use_cpu ? jcl::CLDeviceCPU : jcl::CLDeviceGPU;
  jcl::CLVendor vendor = jcl::CLVendorAny;

  const bool device_exists =
      jcl::OpenCLContext::queryDeviceExists(device, vendor);
  if (!device_exists) {
    if (use_cpu) {
      std::cerr << "No CPU devices attached.";
    } else {
      std::cerr << "No GPU devices attached.";
    }
  }
  RASSERT(device_exists);

  // Otherwise, initialize the context.
  cl_context.reset(new jcl::OpenCLContext());
  cl_context->init(device, jcl::CLVendorAny, verbose_startup);

  // Make sure the user is requesting a device id that exists.
  RASSERT(requested_deviceid < cl_context->getNumDevices());
  deviceid = requested_deviceid;

  std::cout << "Jtorch is using device " << deviceid << ": "
            << cl_context->getDeviceName(deviceid) << std::endl;

  // Startup clblas.
  // TODO(tompson): I have NO idea what device ID this will run on.
  const cl_int blas_ret = clblasSetup();
  const bool blas_ok = (blas_ret == CL_SUCCESS);
  if (!blas_ok) {
    std::cout << "ERROR - InitJTorchInternal: clblasSetup returned error: "
              << jcl::OpenCLContext::getErrorString(blas_ret);
  }
  RASSERT(blas_ok);
}
Esempio n. 17
0
static void
runs_coalesce_band( Run*  *psrc_spans, Run*  *pdst_spans, SkinBox*  minmax )
{
    Run*  sspan  = *psrc_spans;
    Run*  dspan  = *pdst_spans;
    int   pleft  = sspan[0];
    int   pright = sspan[1];
    int   xleft, xright;

    RASSERT(pleft != XSENTINEL);
    RASSERT(pright != XSENTINEL);
    RASSERT(pleft < pright);

    if (pleft < minmax->x1) minmax->x1 = pleft;

    sspan += 2;
    xleft  = sspan[0];

    while (xleft != XSENTINEL)
    {
        xright = sspan[1];
        RASSERT(xright != XSENTINEL);
        RASSERT(xleft < xright);

        if (xleft == pright) {
            pright = xright;
        } else {
            dspan[0] = (Run) pleft;
            dspan[1] = (Run) pright;
            dspan   += 2;
        }
        sspan += 2;
        xleft = sspan[0];
    }
    dspan[0] = (Run) pleft;
    dspan[1] = (Run) pright;
    dspan[2] = XSENTINEL;
    dspan   += 3;
    sspan   += 1;  /* skip XSENTINEL */

    if (pright > minmax->x2) minmax->x2 = pright;

    *psrc_spans = sspan;
    *pdst_spans = dspan;
}
void comm_mutex_rw::wr_unlock()
{
	RASSERT( _nWlocks );
	_nWlocks--;
	if( _nWlocks == 0 ) {
		_W_owner = 0;
        comm_mutex_rw_UNLOCK_IN;
	}
}
Esempio n. 19
0
static RunStore*
runstore_alloc( int   count )
{
    RunStore*  s = malloc( sizeof(*s) + sizeof(Run)*count );
    RASSERT(s != NULL);
    s->count    = count;
    s->refcount = 1;
    return s;
}
Esempio n. 20
0
static int
runs_coalesce( Run*  dst, Run*  src, SkinBox*  minmax )
{
    Run*  prev = NULL;
    Run*  dst0 = dst;
    int   ytop = src[0];
    int   ybot;

    while (ytop != YSENTINEL)
    {
        Run*  sspan = src + 2;
        Run*  dspan = dst + 2;

        ybot = src[1];
        RASSERT( ytop < ybot );
        RASSERT( ybot != YSENTINEL );
        RASSERT( src[2] != XSENTINEL );

        if (ytop < minmax->y1) minmax->y1 = ytop;
        if (ybot > minmax->y2) minmax->y2 = ybot;

        dst[0] = (Run) ytop;
        dst[1] = (Run) ybot;

        runs_coalesce_band( &sspan, &dspan, minmax );

        if (prev && prev[1] == dst[0] && (dst-prev) == (dspan-dst) &&
                !memcmp(prev+2, dst+2, (dspan-dst-2)*sizeof(Run)))
        {
            /* coalesce two identical bands */
            prev[1] = dst[1];
        }
        else
        {
            prev = dst;
            dst  = dspan;
        }
        src  = sspan;
        ytop = src[0];
    }
    dst[0] = YSENTINEL;
    return (dst + 1 - dst0);
}
Esempio n. 21
0
void SpatialMaxPooling::init(std::shared_ptr<TorchData> input) {
  RASSERT(input->type() == TorchDataType::TENSOR_DATA);
  Tensor<float>* in = TO_TENSOR_PTR(input.get());
  RASSERT(in->dim() == 2 || in->dim() == 3);

  // We'll escentially do ceil_mode = false from torch
  const uint32_t iwidth = in->size()[0];
  const uint32_t iheight = in->size()[1];
  uint32_t oheight = (long)(floor((float)(iheight - kh_ + 2*padh_) / dh_)) + 1;
  uint32_t owidth  = (long)(floor((float)(iwidth  - kw_ + 2*padw_) / dw_)) + 1;

  if (output != nullptr && TO_TENSOR_PTR(output.get())->dim() != in->dim()) {
    // Input dimension has changed!
    output = nullptr;
  }

  if (output != nullptr) {
    // Check that the dimensions above the lowest 2 match
    for (uint32_t i = 2; i < in->dim() && output != nullptr; i++) {
      if (TO_TENSOR_PTR(output.get())->size()[i] != in->size()[i]) {
        output = nullptr;
      }
    }
  }

  if (output != nullptr) {
    // Check that the lowest 2 dimensions are the correct size
    if (TO_TENSOR_PTR(output.get())->size()[0] != owidth ||
        TO_TENSOR_PTR(output.get())->size()[1] != oheight) {
      output = nullptr;
    }
  }

  if (output == nullptr) {
    std::unique_ptr<uint32_t[]> out_size(new uint32_t[in->dim()]);
    out_size[0] = owidth;
    out_size[1] = oheight;
    for (uint32_t i = 2; i < in->dim(); i++) {
      out_size[i] = in->size()[i];
    }
    output.reset(new Tensor<float>(in->dim(), out_size.get()));
  }
}
Esempio n. 22
0
void OpenCLContext::runKernel(const uint32_t device_index, const uint32_t dim,
                              const uint32_t* global_work_size,
                              const uint32_t* local_work_size,
                              const bool blocking) {
  // You must call OpenCL::useKernel() first.
  RASSERT(cur_kernel_ != nullptr);
  RASSERT(device_index < devices_.size());
  RASSERT(dim <= 3);  // OpenCL doesn't support greater than 3 dims!

  uint32_t total_worksize = 1;
  for (uint32_t i = 0; i < dim; i++) {
    // Check that: Global workgroup size is evenly divisible by the local work
    // group size!
    RASSERT((global_work_size[i] % local_work_size[i]) == 0);
    total_worksize *= local_work_size[i];
    // Check that: Local workgroup size is not greater than
    // devices_max_workitem_size_
    RASSERT((uint32_t)local_work_size[i] <=
            (uint32_t)devices_max_workitem_size_[device_index][i]);
  }
  // Check that: Local workgroup size is not greater than
  // CL_DEVICE_MAX_WORK_GROUP_SIZE!
  RASSERT(total_worksize <=
          (uint32_t)devices_max_workgroup_size_[device_index]);
  uint32_t max_size = queryMaxWorkgroupSizeForCurKernel(device_index);
  // Check that: Local workgroup size is not greater than
  // CL_KERNEL_WORK_GROUP_SIZE!
  RASSERT(total_worksize <= (uint32_t)max_size);

  cl::NDRange offset = cl::NullRange;
  cl::NDRange global_work;
  cl::NDRange local_work;
  switch (dim) {
    case 1:
      global_work = cl::NDRange(global_work_size[0]);
      local_work = cl::NDRange(local_work_size[0]);
      break;
    case 2:
      global_work = cl::NDRange(global_work_size[0], global_work_size[1]);
      local_work = cl::NDRange(local_work_size[0], local_work_size[1]);
      break;
    case 3:
      global_work = cl::NDRange(global_work_size[0], global_work_size[1],
                                global_work_size[2]);
      local_work = cl::NDRange(local_work_size[0], local_work_size[1],
                               local_work_size[2]);
      break;
  }
  cl::Event cur_event;
  CHECK_ERROR(queues_[device_index].enqueueNDRangeKernel(
      cur_kernel_->kernel(), offset, global_work, local_work, nullptr,
      &cur_event));

  if (blocking) {
    cur_event.wait();
  }
}
Esempio n. 23
0
static Run*
runs_next_scanline( Run*  runs )
{
    RASSERT(runs[0] != YSENTINEL && runs[1] != YSENTINEL );
    runs += 2;
    do {
        runs += 1;
    }
    while (runs[-1] != XSENTINEL);
    return runs;
}
// kernel1d default is either TorchStage::gaussian1D<float>(n) or just a
// vector of 1 values.
SpatialDivisiveNormalization::SpatialDivisiveNormalization(
    const std::shared_ptr<Tensor<float>> kernel, const float threshold)
    : TorchStage() {
  RASSERT(kernel->dim() <= 2);  // Averaging kernel must be 1D or 2D!

  // Averaging kernel must have odd size!
  RASSERT(kernel->size()[0] % 2 != 0 &&
         !(kernel->dim() == 2 && kernel->size()[1] % 2 == 0));

  kernel_.reset(Tensor<float>::clone(*kernel));
  kernel_norm_ = nullptr;  // Normalization is input size dependant

  output = nullptr;
  std_coef_.reset(nullptr);
  std_pass1_.reset(nullptr);
  std_pass2_.reset(nullptr);
  std_.reset(nullptr);

  threshold_ = threshold;
}
Esempio n. 25
0
wtnt_t *newwtnt()
{
	wtnt_t *wnptr;

	wnptr = valloc((size_t)sizeof(wtnt_t));

	RASSERT((wnptr != WNULL), "Cannot allocate space for write notices!");
	wnptr->more = WNULL;
	wnptr->wtntc = 0;
	return(wnptr);
}
Esempio n. 26
0
/*-----------------------------------------------------------*/
address_t newtwin()
{
	address_t twin;
	int allocsize;

	allocsize = Pagesize;
	twin = (address_t)valloc((size_t)allocsize);

	RASSERT((twin != (address_t)NULL), "Cannot allocate twin space!");
	return(twin);
}
// kernel1d default is either TorchStage::gaussian1D<float>(n) or just a
// vector of 1 values.
SpatialSubtractiveNormalization::SpatialSubtractiveNormalization(
    const std::shared_ptr<Tensor<float>> kernel)
    : TorchStage() {
  RASSERT(kernel->dim() <= 2);

  // Averaging kernel must have odd size!
  RASSERT(kernel->size()[0] % 2 != 0 &&
         !(kernel->dim() == 2 && kernel->size()[1] % 2 == 0));

  // Clone and normalize the input kernel
  kernel_.reset(Tensor<float>::clone(*kernel.get()));
  float sum = Tensor<float>::slowSum(*kernel_);
  Tensor<float>::div(*kernel_, sum);

  output = nullptr;
  mean_coef_.reset(nullptr);
  mean_pass1_.reset(nullptr);
  mean_pass2_.reset(nullptr);
  mean_.reset(nullptr);
}
void comm_mutex_rw::wr_lock()
{
	/// possible dead locks:	th1:W-lock, th2:rd_lock(), th1:wr_lock()
	DWORD t = GetCurrentThreadId();
	if( Waccess(t) ) {              /// already W-locked
		_nWlocks++;
		return;
	}

	comm_mutex_rw_LOCK_IN;
	RASSERT( _nWlocks == 0 );

	comm_mutex_rw_LOCK_OUT;
	_nWlocks++;
	_W_owner = t;
	comm_mutex_rw_UNLOCK_OUT;

	int ret = WaitForSingleObject( (HANDLE)_cndAccess, INFINITE );
	RASSERT( ret == WAIT_OBJECT_0 );
	RASSERT( Raccess() );
}
Esempio n. 29
0
void Reshape::init(std::shared_ptr<TorchData> input) {
    RASSERT(input->type() == TorchDataType::TENSOR_DATA);
    Tensor<float>* in = TO_TENSOR_PTR(input.get());

    int32_t nelems = outNElem();
    static_cast<void>(nelems);
    // Check the input size.
    RASSERT(in->nelems() == static_cast<uint32_t>(nelems));

    if (output != nullptr) {
        Tensor<float>* out = TO_TENSOR_PTR(output.get());
        if (out->storage() != in->storage()) {
            // The tensors don't share the same storage! Reinitialize the view.
            output = nullptr;
        }
    }

    if (output == nullptr) {
        output = Tensor<float>::view(*in, odim_, osize_.get());
    }
}
bool comm_mutex_rw::try_rd_lock()
{
#if( _WIN32_WINNT >= 0x0400 )
	DWORD t = GetCurrentThreadId();
	if( Waccess(t) ) {						/// already W-locked
        _nRin++;
		RASSERT( _nWlocks != 0 );
		return true;
	}
	else {
		if( TryEnterCriticalSection((CRITICAL_SECTION*)&_mxIN) ) {
			_nRin++;
			RASSERT( _nWlocks == 0 );
			comm_mutex_rw_UNLOCK_IN;
			return true;
		}
	}
#endif

	return false;
}