THFloatTensor *nn_SpatialConvolutionMM_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialConvolution.kW;
	int kH = module->SpatialConvolution.kH;
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;
	int padW = module->SpatialConvolution.padW;
	int padH = module->SpatialConvolution.padH;

	THFloatTensor *finput = module->SpatialConvolution.finput;
	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias   = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int batch = 1;
	if (input->nDimension == 3) {
		batch = 0;
		THFloatTensor_resize4d(input, 1, input->size[0], input->size[1], input->size[2]);
	}

	long batchSize = input->size[0];
	long nInputPlane  = module->SpatialConvolution.nInputPlane;
	long nOutputPlane = module->SpatialConvolution.nOutputPlane;
	long inputWidth   = input->size[3];
	long inputHeight  = input->size[2];
	long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
	long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;


	if (outputWidth < 1 || outputHeight < 1)
		THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
		nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);

	THFloatTensor_resize3d(finput, batchSize, kW*kH*nInputPlane, outputHeight*outputWidth);
	THFloatTensor_resize4d(output, batchSize, nOutputPlane, outputHeight, outputWidth);

	long t;
#pragma omp parallel for if(batchSize >= 4) private(t)
	for (t = 0; t < batchSize; t++) {
		THFloatTensor *input_t = THFloatTensor_newSelect(input, 0, t);
		THFloatTensor *output_t = THFloatTensor_newSelect(output, 0, t);
		THFloatTensor *finput_t = THFloatTensor_newSelect(finput, 0, t);

		nn_SpatialConvolutionMM_updateOutput_frame(input_t, output_t, weight, bias, finput_t,
			kW, kH, dW, dH, padW, padH,
			nInputPlane, inputWidth, inputHeight,
			nOutputPlane, outputWidth, outputHeight);

		THFloatTensor_free(input_t);
		THFloatTensor_free(output_t);
		THFloatTensor_free(finput_t);
	}

	if (batch == 0) {
		THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
		THFloatTensor_resize3d(input, nInputPlane, inputHeight, inputWidth);
	}

	return output;
}
Ejemplo n.º 2
0
// frame grabber
static int l_grabFrame (lua_State *L) {
  // Get Tensor's Info
  const int idx = lua_tonumber(L, 1);
  THFloatTensor * tensor = luaT_checkudata(L, 2, luaT_checktypename2id(L, "torch.FloatTensor"));

  // grab frame
  frame[idx] = cvQueryFrame ( capture[idx] );
  if( !frame[idx] ) {
    perror("could not query OpenCV capture");
  }

  // resize given tensor
  THFloatTensor_resize3d(tensor, 3, frame[idx]->height, frame[idx]->width);

  // copy to tensor
  int m0 = tensor->stride[1];
  int m1 = tensor->stride[2];
  int m2 = tensor->stride[0];
  unsigned char *src = frame[idx]->imageData;
  float *dst = THFloatTensor_data(tensor);
  int i, j, k;
  for (i=0; i < frame[idx]->height; i++) {
    for (j=0, k=0; j < frame[idx]->width; j++, k+=m1) {
      // red:
      dst[k] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 2]/255.;
      // green:
      dst[k+m2] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 1]/255.;
      // blue:
      dst[k+2*m2] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 0]/255.;
    }
    dst += m0;
  }

  return 0;
}
Ejemplo n.º 3
0
THFloatTensor *nn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input)
{
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;

	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int dimw = 2;
	int dimh = 1;

	if (input->nDimension == 4)
	{
		dimw++;
		dimh++;
	}
	
	long nOutputPlane = weight->size[0];
	long kW           = weight->size[3];
	long kH           = weight->size[2];
	long inputWidth   = input->size[dimw];
	long inputHeight  = input->size[dimh];
	long outputWidth  = (inputWidth - kW) / dW + 1;
	long outputHeight = (inputHeight - kH) / dH + 1;

	if (input->nDimension == 3)
	{
		long i;
		float *bias_data;
		float *output_data;

		THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
		/* add bias */
		bias_data = THFloatTensor_data(bias);
		output_data = THFloatTensor_data(output);

#pragma omp parallel for private(i)
		for (i=0; i<bias->size[0]; i++)
		{
			float *ptr_output = output_data + i*outputWidth*outputHeight;
			long j;
			for(j = 0; j < outputWidth*outputHeight; j++)
				ptr_output[j] = bias_data[i];
		}
		THFloatTensor_conv2Dmv(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
	}
	else
	{
		float *bias_data;
		float *output_data; 
		long p;

		THFloatTensor_resize4d(output, input->size[0], nOutputPlane, outputHeight, outputWidth);

		bias_data = THFloatTensor_data(bias);
		output_data = THFloatTensor_data(output);

#pragma omp parallel for private(p)
		for (p=0; p<input->size[0]; p++)
		{
			/* BIAS */
			long i;
			for (i=0; i<bias->size[0]; i++)
			{
				float *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
				long j;
				for(j = 0; j < outputWidth*outputHeight; j++)
					ptr_output[j] = bias_data[i];
			}
		}

		/* do convolutions */
		THFloatTensor_conv2Dmm(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
	}
	return output;
}
Ejemplo n.º 4
0
void THFloatTensor_conv2Dmv(THFloatTensor *r_, float beta, float alpha, THFloatTensor *t_, THFloatTensor *k_, long srow, long scol, const char *vf, const char *xc)
{
	long nInputPlane, nInputRows, nInputCols;
	long nKernelRows, nKernelCols;
	long nOutputPlane, nOutputRows, nOutputCols;
	long istride0, kstride0, kstride1;
	THFloatTensor *input;
	THFloatTensor *kernel;
	float *input_data;
	float *weight_data;
	float *output_data;
	long nelem;
	long k;

	if(t_->nDimension != 3)
		THError("input: 3D Tensor expected");
	if(k_->nDimension != 4)
		THError("kernel: 4D Tensor expected");
	if(srow < 1)
		THError("Stride should be a positive integer");
	if(scol < 1)
		THError("Stride should be a positive integer");
	if(*vf != 'V' || *xc != 'X')
		THError("Type of convolution can be 'V','X' only");

	input = t_;
	kernel = k_;

	nInputPlane = input->size[0];
	istride0    = input->stride[0];
	nInputRows  = input->size[1];
	nInputCols  = input->size[2];

	kstride0    = kernel->stride[0];
	kstride1    = kernel->stride[1];
	nKernelRows = kernel->size[2];
	nKernelCols = kernel->size[3];
	nOutputPlane = kernel->size[0];
	if(kernel->size[1] != nInputPlane)
		THError("invalid number of input planes");
	if(!(nInputRows >= nKernelRows && nInputCols >= nKernelCols))
		THError("conv2Dmv : Input image is smaller than kernel");

	nOutputRows = (nInputRows - nKernelRows) / srow + 1;
	nOutputCols = (nInputCols - nKernelCols) / scol + 1;

	nelem = THFloatTensor_nElement(r_);
	THFloatTensor_resize3d(r_, nOutputPlane, nOutputRows, nOutputCols);

	input_data = THFloatTensor_data(input);
	weight_data = THFloatTensor_data(kernel);
	output_data = THFloatTensor_data(r_);

	if (nelem == 0 || beta == 0 || nelem != THFloatTensor_nElement(r_))
	{
		/*THFloatTensor_zero)(r_);*/
#pragma omp parallel for private(k)
		for (k = 0; k < r_->size[0]; k++)
		{
			float* ptr_output = output_data + k*nOutputCols*nOutputRows;
			long l;
			for (l = 0; l < nOutputRows*nOutputCols; l++)
			ptr_output[l] = 0.0;
		}
	}
	else if (beta != 1)
	{
		/*THFloatTensor_mul)(r_, beta);*/
#pragma omp parallel for private(k)
		for (k = 0; k < r_->size[0]; k++)
		{
			float* ptr_output = output_data + k*nOutputCols*nOutputRows;
			long l;
			for (l = 0; l < nOutputRows*nOutputCols; l++)
				ptr_output[l] *= beta;
		}
	}

#pragma omp parallel for private(k)
	for(k = 0; k < nOutputPlane; k++)
	{
		long i;
		/* get output */
		float *ptr_output = output_data + k*nOutputCols*nOutputRows;
		for(i = 0; i < nInputPlane; i++)
		{
			/* get kernel */
			float *ptr_weight = weight_data + k*kstride0 + i*kstride1;
			/* get input */
			float *ptr_input = input_data + i*istride0;

			/* do image, kernel convolution */
			THFloatTensor_validXCorr2Dptr(ptr_output,
				alpha,
				ptr_input,  nInputRows,  nInputCols,
				ptr_weight, nKernelRows, nKernelCols,
				srow, scol);
		}
    }
}