Esempio n. 1
0
/*
 * Based on the implementation of the THTensor_(indexCopy) in torch7
 */
static void THCudaTensor_indexFill(THCudaTensor *tensor, int dim, THLongTensor *index, float val)
{
  long i, numel;
  THCudaTensor *tSlice;
  long *index_data;

  numel = THLongTensor_nElement(index);
  THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
  THArgCheck(dim < tensor->nDimension,4,"Indexing dim is out of bounds");

  index = THLongTensor_newContiguous(index);
  index_data = THLongTensor_data(index);
  
  for (i=0; i<numel; i++)
  {
    if (tensor->nDimension > 1 )
    {
      // create a new CudaTensor
      tSlice = THCudaTensor_new();
      // set its storage to point to the corresponding storage in tensor
      THCudaTensor_select(tSlice, tensor,dim,index_data[i]-1);
      THCudaTensor_fill(tSlice, val);
      THCudaTensor_free(tSlice);
    }
    else
    {
      THCudaTensor_set1d(tensor,index_data[i]-1,val);
    }
  }
  THLongTensor_free(index);
}
Esempio n. 2
0
/*
 * Based on the implementation of the THTensor_(indexCopy) in torch7
 */
static void THCudaTensor_indexCopy(THCudaTensor *tensor, int dim, THLongTensor *index, THCudaTensor *src)
{
  long i, numel;
  THCudaTensor *tSlice, *sSlice;
  long *index_data;

  numel = THLongTensor_nElement(index);
  THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
  THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds");

  index = THLongTensor_newContiguous(index);
  index_data = THLongTensor_data(index);

  for (i=0; i<numel; i++)
  {
    if (tensor->nDimension > 1 )
    {
      tSlice = THCudaTensor_new();
      sSlice = THCudaTensor_new();
      THCudaTensor_select(tSlice, tensor, dim, index_data[i]-1);
      THCudaTensor_select(sSlice, src, dim, i);
      THCudaTensor_copy(tSlice, sSlice);
      THCudaTensor_free(tSlice);
      THCudaTensor_free(sSlice);
    }
    else
    {
      // It's faster to copy a float from an address in the device to another address in the device than 
      // retrieving it to the host memory and recopy it to the device memory
      THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + index_data[i]-1,\
        src->storage->data + src->storageOffset + i, sizeof(float), cudaMemcpyDeviceToDevice));
    }
  }
  THLongTensor_free(index);
}
Esempio n. 3
0
static void load_array_to_lua(lua_State *L, cnpy::NpyArray& arr){
	int ndims = arr.shape.size();

	//based on code from mattorch with stride fix
	int k;
	THLongStorage *size = THLongStorage_newWithSize(ndims);
	THLongStorage *stride = THLongStorage_newWithSize(ndims);
	for (k=0; k<ndims; k++) {
		THLongStorage_set(size, k, arr.shape[k]);
		if (k > 0)
			THLongStorage_set(stride, ndims-k-1, arr.shape[ndims-k]*THLongStorage_get(stride,ndims-k));
		else
			THLongStorage_set(stride, ndims-k-1, 1);
	}

	void * tensorDataPtr = NULL;
	size_t numBytes = 0;

	if ( arr.arrayType == 'f' ){ // float32/64
		if ( arr.word_size == 4 ){ //float32
			THFloatTensor *tensor = THFloatTensor_newWithSize(size, stride);
		    tensorDataPtr = (void *)(THFloatTensor_data(tensor));
		    numBytes = THFloatTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.FloatTensor"));
    
		}else if ( arr.word_size ==  8){ //float 64
			THDoubleTensor *tensor = THDoubleTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THDoubleTensor_data(tensor));
		    numBytes = THDoubleTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.DoubleTensor"));
		}
	}else if ( arr.arrayType == 'i' || arr.arrayType == 'u' ){ // does torch have unsigned types .. need to look
		if ( arr.word_size == 1 ){ //int8
			THByteTensor *tensor = THByteTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THByteTensor_data(tensor));
		    numBytes = THByteTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ByteTensor"));
    
		}else if ( arr.word_size == 2 ){ //int16
			THShortTensor *tensor = THShortTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THShortTensor_data(tensor));
		    numBytes = THShortTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ShortTensor"));
    
		}else if ( arr.word_size == 4 ){ //int32
			THIntTensor *tensor = THIntTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THIntTensor_data(tensor));
		    numBytes = THIntTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.IntTensor"));
    
		}else if ( arr.word_size ==  8){ //long 64
			THLongTensor *tensor = THLongTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THLongTensor_data(tensor));
		    numBytes = THLongTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.LongTensor"));
		}
	}else{
		printf("array type unsupported");
		throw std::runtime_error("unsupported data type");
	}

		// now copy the data
		assert(tensorDataPtr);
		memcpy(tensorDataPtr, (void *)(arr.data<void>()), numBytes);


}