Ejemplo n.º 1
0
static void THCudaTensor_rawSet(THCState *state, THCudaTensor *self, THCudaStorage *storage, long storageOffset, int nDimension, long *size, long *stride)
{
  THAssert(self->storage != NULL);
  /* storage */
  if(self->storage != storage)
  {
    if(self->storage)
      THCudaStorage_free(state, self->storage);

    if(storage)
    {
      self->storage = storage;
      THCudaStorage_retain(state, self->storage);
    }
    else
      self->storage = THCudaStorage_new(state);
  }

  /* storageOffset */
  if(storageOffset < 0)
    THError("Tensor: invalid storage offset");
  self->storageOffset = storageOffset;

  /* size and stride */
  THCudaTensor_rawResize(state, self, nDimension, size, stride);
}
Ejemplo n.º 2
0
static void THCudaTensor_rawInit(THCState *state, THCudaTensor *self)
{
  self->refcount = 1;
  self->storage = THCudaStorage_new(state);
  self->storageOffset = 0;
  self->size = NULL;
  self->stride = NULL;
  self->nDimension = 0;
  self->flag = TH_TENSOR_REFCOUNTED;
}
Ejemplo n.º 3
0
THCudaStorage* THCudaStorage_newWithSize(long size)
{
  THArgCheck(size >= 0, 2, "invalid size");

  if(size > 0)
  {
    THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage));
    THCudaCheck(cudaMalloc((void**)&(storage->data), size * sizeof(float)));
    storage->size = size;
    storage->refcount = 1;
    storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
    return storage;
  }
  else
  {
    return THCudaStorage_new();
  }
}
Ejemplo n.º 4
0
static void THCudaTensor_rawResize(THCState *state, THCudaTensor *self, int nDimension, long *size, long *stride)
{
  int d;
  int nDimension_;
  long totalSize;
  int hascorrectsize = 1;

  nDimension_ = 0;
  for(d = 0; d < nDimension; d++)
  {
    if(size[d] > 0)
    {
      nDimension_++;
      if((self->nDimension > d) && (size[d] != self->size[d]))
        hascorrectsize = 0;

      if((self->nDimension > d) && stride && (stride[d] >= 0) && (stride[d] != self->stride[d]))
        hascorrectsize = 0;
    }
    else
      break;
  }
  nDimension = nDimension_;

  if(nDimension != self->nDimension)
    hascorrectsize = 0;

  if(hascorrectsize)
    return;

  if(nDimension > 0)
  {
    if(nDimension != self->nDimension)
    {
      self->size = (long*)THRealloc(self->size, sizeof(long)*nDimension);
      self->stride = (long*)THRealloc(self->stride, sizeof(long)*nDimension);
      self->nDimension = nDimension;
    }

    totalSize = 1;
    for(d = self->nDimension-1; d >= 0; d--)
    {
      self->size[d] = size[d];
      if(stride && (stride[d] >= 0) )
        self->stride[d] = stride[d];
      else
      {
        if(d == self->nDimension-1)
          self->stride[d] = 1;
        else
          self->stride[d] = self->size[d+1]*self->stride[d+1];
      }
      totalSize += (self->size[d]-1)*self->stride[d];
    }

    if(totalSize+self->storageOffset > 0)
    {
      if(!self->storage)
        self->storage = THCudaStorage_new(state);
      if(totalSize+self->storageOffset > self->storage->size)
        THCudaStorage_resize(state, self->storage, totalSize+self->storageOffset);
    }
  }
  else
    self->nDimension = 0;
}
Ejemplo n.º 5
0
int THProcessImages(THNETWORK *network, unsigned char **images, int batchsize, int width, int height, int stride, float **results, int *outwidth, int *outheight, int bgr)
{
	int i;
	THFloatTensor *out, *t = 0;
	THFloatStorage *st;
	
#ifdef CUDNN
	if(network->net->cuda)
	{
#ifdef HAVEFP16
		if(floattype == CUDNN_DATA_HALF)
		{
			st = THCudaStorage_new(batchsize * (width * height * 3));
			for(i = 0; i < batchsize; i++)
				cuda_rgb2half((unsigned short *)st->data + i * (width * height * 3), images[i], width, height, stride, network->mean, network->std, bgr);
		} else
#endif
		{
			st = THCudaStorage_new(batchsize * width * height * 3);
			for(i = 0; i < batchsize; i++)
				cuda_rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std, bgr);
		}
	} else
#endif
#ifdef OPENCL
	if(network->net->opencl)
		t = OpenCL_LoadImage(images[0], width, height, stride, network->mean, network->std, bgr);
	else
#endif
	{
		st = THFloatStorage_new(batchsize * width * height * 3);
		if(bgr)
#pragma omp parallel for if(batchsize>1) private(i)
			for(i = 0; i < batchsize; i++)
				bgr2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std);
		else
#pragma omp parallel for if(batchsize>1) private(i)
			for(i = 0; i < batchsize; i++)
				rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std);
	}
	if(!t)
	{
		t = THFloatTensor_new();
		t->storage = st;
		if(batchsize == 1)
		{
			t->nDimension = 3;
			t->size[0] = 3;
			t->size[1] = height;
			t->size[2] = width;
			t->stride[0] = width * height;
			t->stride[1] = width;
			t->stride[2] = 1;
		} else {
			t->nDimension = 4;
			t->size[0] = batchsize;
			t->size[1] = 3;
			t->size[2] = height;
			t->size[3] = width;
			t->stride[0] = 3 * width * height;
			t->stride[1] = width * height;
			t->stride[2] = width;
			t->stride[3] = 1;
		}
	}
#ifdef CUDNN
	if(network->net->cuda)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
#ifdef HAVEFP16
		if(floattype == CUDNN_DATA_HALF)
			network->out = THFloatTensor_newFromHalfCudaTensor(out);
		else
#endif
			network->out = THFloatTensor_newFromCudaTensor(out);
		out = network->out;
	} else
#endif
#ifdef OPENCL
	if(network->net->opencl)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
#ifdef HAVEFP16
		if(cl_datasize == 2)
			network->out = THFloatTensor_newFromHalfOpenCLImageTensor(out);
		else
#endif
			network->out = THFloatTensor_newFromOpenCLImageTensor(out);
		out = network->out;
	} else
#endif
		out = forward(network->net, t);
	THFloatTensor_free(t);
	*results = out->storage->data;
	if(out->nDimension >= 3)
	{
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
}