Пример #1
0
// Copy extracted patches to CUDA memory and run the network
// One has to keep mind that GPU memory is limited and extracting too many patches
// at once might cause troubles
// So if you need to extract a lot of patches, an efficient way would be to
// devide the set in smaller equal parts and preallocate CPU and GPU memory
void extractDescriptors(THCState *state,
    cunn::Sequential::Ptr net,
    const std::vector<cv::Mat>& patches,
    cv::Mat& descriptors)
{
  size_t batch_size = 128;
  size_t N = patches.size();

  THFloatTensor *buffer = THFloatTensor_newWithSize4d(batch_size, 1, M, M);
  THCudaTensor *input = THCudaTensor_newWithSize4d(state, batch_size, 1, M, M);

  for(int j=0; j < ceil((float)N/batch_size); ++j)
  {
    float *data = THFloatTensor_data(buffer);
    size_t k = 0;
    for(size_t i = j*batch_size; i < std::min((j+1)*batch_size, N); ++i, ++k)
      memcpy(data + k*M*M, patches[i].data, sizeof(float) * M * M);

    // initialize 4D CUDA tensor and copy patches into it
    THCudaTensor_copyFloat(state, input, buffer);

    // propagate through the network
    THCudaTensor *output = net->forward(input);

    // copy descriptors back
    THFloatTensor *desc = THFloatTensor_newWithSize2d(output->size[0], output->size[1]);
    THFloatTensor_copyCuda(state, desc, output);

    size_t feature_dim = output->size[1];
    if(descriptors.cols != feature_dim || descriptors.rows != N)
      descriptors.create(N, feature_dim, CV_32F);

    memcpy(descriptors.data + j * feature_dim * batch_size * sizeof(float),
        THFloatTensor_data(desc),
        sizeof(float) * feature_dim * k);

    THFloatTensor_free(desc);
  }

  THCudaTensor_free(state, input);
  THFloatTensor_free(buffer);
}
Пример #2
0
static int luafunc_load(lua_State *L)
{
	THFloatTensor *t = 0;
	const char *tname = luaT_typename(L, 1);
	int i, index = lua_tointeger(L, 2);

	if(max == 0)
		luaL_error(L, "fastimage.init: call init first");
	if(index > nsizes)
		luaL_error(L, "Invalid size index %d", index);
	index--;
	if(index < 0)
		index = 0;
	if(tname && !strcmp(tname, "torch.FloatTensor"))
	{
		t = luaT_toudata(L, 1, luaT_typenameid(L, "torch.FloatTensor"));
		if(t->nDimension == 4 && t->size[1] == 3)
		{
			if(nsizes == 1)
			{
				sizes[0].width = t->size[3];
				sizes[0].height = t->size[2];
				max = t->size[0];
			} else if(sizes[0].width != t->size[3] || sizes[0].height != t->size[2] ||
				max != t->size[0])
				t = 0;
		} else t = 0;
	}
	if(!index)
	{
		for(i = 0; i < max; i++)
			if(images[i].bitmap)
			{
				free(images[i].bitmap);
				images[i].bitmap = 0;
			}
		for(i = 0; i < max; i++)
		{
			if(loadnextimage(images + i))
				break;
		}
		if(i == 0)
		{
			lprintf("Nothing found\n");
			return 0;
		}
		if(i < max)
		{
			max = i;
			if(t)
				t = THFloatTensor_newNarrow(t, 0, 0, i);
		}	
	}
	for(i = 0; i < max; i++)
	{
		if(nsizes == 1 && (!sizes[0].width || !sizes[0].height))
		{
			lprintf("Set width = %d, height = %d\n", images[i].width, images[i].height);
			sizes[0].width = images[i].width;
			sizes[0].height = images[i].height;
		}
		if(!t)
			t = THFloatTensor_newWithSize4d(max, 3, sizes[index].height, sizes[index].width);
		uint8_t *rescaled = scale(images + i, sizes[index].width, sizes[index].height);
		rgb_tofloat(THFloatTensor_data(t) + i * t->stride[0], t->stride[1], t->stride[2], rescaled, sizes[index].width, sizes[index].height);
		if(rescaled != images[i].bitmap)
			free(rescaled);
		if(nsizes == 1 && images[i].bitmap)
		{
			// It's not necessary to keep all the images in memory, if there is only one size
			free(images[i].bitmap);
			images[i].bitmap = 0;
		}
	}
	lprintf("%d x 3 x %d x %d tensor returned\n", i, sizes[index].height, sizes[index].width);
	luaT_pushudata(L, t, "torch.FloatTensor");
	lua_createtable(L, max, 0);
	for(i = 0; i < max; i++)
	{
		lua_pushinteger(L, i+1);

		lua_createtable(L, 0, 3);
		lua_pushstring(L, "filename");
		lua_pushstring(L, images[i].filename);
		lua_settable(L, -3);
		lua_pushstring(L, "width");
		lua_pushinteger(L, images[i].width);
		lua_settable(L, -3);
		lua_pushstring(L, "height");
		lua_pushinteger(L, images[i].height);
		lua_settable(L, -3);

		lua_settable(L, -3);
	}
	return 2;
}