THFloatTensor *nn_SpatialConvolutionMM_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialConvolution.kW;
	int kH = module->SpatialConvolution.kH;
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;
	int padW = module->SpatialConvolution.padW;
	int padH = module->SpatialConvolution.padH;

	THFloatTensor *finput = module->SpatialConvolution.finput;
	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias   = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int batch = 1;
	if (input->nDimension == 3) {
		batch = 0;
		THFloatTensor_resize4d(input, 1, input->size[0], input->size[1], input->size[2]);
	}

	long batchSize = input->size[0];
	long nInputPlane  = module->SpatialConvolution.nInputPlane;
	long nOutputPlane = module->SpatialConvolution.nOutputPlane;
	long inputWidth   = input->size[3];
	long inputHeight  = input->size[2];
	long outputWidth  = (inputWidth + 2*padW - kW) / dW + 1;
	long outputHeight = (inputHeight + 2*padH - kH) / dH + 1;


	if (outputWidth < 1 || outputHeight < 1)
		THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small",
		nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth);

	THFloatTensor_resize3d(finput, batchSize, kW*kH*nInputPlane, outputHeight*outputWidth);
	THFloatTensor_resize4d(output, batchSize, nOutputPlane, outputHeight, outputWidth);

	long t;
#pragma omp parallel for if(batchSize >= 4) private(t)
	for (t = 0; t < batchSize; t++) {
		THFloatTensor *input_t = THFloatTensor_newSelect(input, 0, t);
		THFloatTensor *output_t = THFloatTensor_newSelect(output, 0, t);
		THFloatTensor *finput_t = THFloatTensor_newSelect(finput, 0, t);

		nn_SpatialConvolutionMM_updateOutput_frame(input_t, output_t, weight, bias, finput_t,
			kW, kH, dW, dH, padW, padH,
			nInputPlane, inputWidth, inputHeight,
			nOutputPlane, outputWidth, outputHeight);

		THFloatTensor_free(input_t);
		THFloatTensor_free(output_t);
		THFloatTensor_free(finput_t);
	}

	if (batch == 0) {
		THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
		THFloatTensor_resize3d(input, nInputPlane, inputHeight, inputWidth);
	}

	return output;
}
Beispiel #2
0
int THUseSpatialConvolutionMM(THNETWORK *network, int mm_type)
{
	int i;
	int rc = 0;

	for(i = 0; i < network->net->nelem; i++)
	{
		if(mm_type && network->net->modules[i].type == MT_SpatialConvolution)
		{
			struct SpatialConvolution *c = &network->net->modules[i].SpatialConvolution;
			network->net->modules[i].type = MT_SpatialConvolutionMM;
			network->net->modules[i].updateOutput = nn_SpatialConvolutionMM_updateOutput;
			THFloatTensor_resize2d(c->weight, c->nOutputPlane, c->nInputPlane * c->kH * c->kW);
		} else if(!mm_type && (network->net->modules[i].type == MT_SpatialConvolutionMM ||
			network->net->modules[i].type == MT_SpatialConvolutionVirtMM))
		{
			struct SpatialConvolution *c = &network->net->modules[i].SpatialConvolution;
			if(c->padW || c->padH)
			{
				rc = ERR_NOTIMPLEMENTED;
				continue;
			}
			network->net->modules[i].type = MT_SpatialConvolution;
			network->net->modules[i].updateOutput = nn_SpatialConvolution_updateOutput;
			THFloatTensor_resize4d(c->weight, c->nOutputPlane, c->nInputPlane, c->kH, c->kW);
		}
#ifndef USEBLAS
		if(mm_type == 2 && network->net->modules[i].type == MT_SpatialConvolutionMM)
			network->net->modules[i].type = MT_SpatialConvolutionVirtMM;
		else if(mm_type == 1 && network->net->modules[i].type == MT_SpatialConvolutionVirtMM)
			network->net->modules[i].type = MT_SpatialConvolutionMM;
#endif
	}
	return rc;
}
Beispiel #3
0
THFloatTensor *nn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input)
{
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;

	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int dimw = 2;
	int dimh = 1;

	if (input->nDimension == 4)
	{
		dimw++;
		dimh++;
	}
	
	long nOutputPlane = weight->size[0];
	long kW           = weight->size[3];
	long kH           = weight->size[2];
	long inputWidth   = input->size[dimw];
	long inputHeight  = input->size[dimh];
	long outputWidth  = (inputWidth - kW) / dW + 1;
	long outputHeight = (inputHeight - kH) / dH + 1;

	if (input->nDimension == 3)
	{
		long i;
		float *bias_data;
		float *output_data;

		THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
		/* add bias */
		bias_data = THFloatTensor_data(bias);
		output_data = THFloatTensor_data(output);

#pragma omp parallel for private(i)
		for (i=0; i<bias->size[0]; i++)
		{
			float *ptr_output = output_data + i*outputWidth*outputHeight;
			long j;
			for(j = 0; j < outputWidth*outputHeight; j++)
				ptr_output[j] = bias_data[i];
		}
		THFloatTensor_conv2Dmv(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
	}
	else
	{
		float *bias_data;
		float *output_data; 
		long p;

		THFloatTensor_resize4d(output, input->size[0], nOutputPlane, outputHeight, outputWidth);

		bias_data = THFloatTensor_data(bias);
		output_data = THFloatTensor_data(output);

#pragma omp parallel for private(p)
		for (p=0; p<input->size[0]; p++)
		{
			/* BIAS */
			long i;
			for (i=0; i<bias->size[0]; i++)
			{
				float *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
				long j;
				for(j = 0; j < outputWidth*outputHeight; j++)
					ptr_output[j] = bias_data[i];
			}
		}

		/* do convolutions */
		THFloatTensor_conv2Dmm(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
	}
	return output;
}
Beispiel #4
0
void THFloatTensor_conv2Dmm(THFloatTensor *r_, float beta, float alpha, THFloatTensor *t_, THFloatTensor *k_, long srow, long scol, const char *vf, const char *xc)
{
	long nInputPlane, nInputRows, nInputCols;
	long nKernelRows, nKernelCols;
	long nOutputPlane, nOutputRows, nOutputCols;
	long kstride0, kstride1;
	THFloatTensor *input;
	THFloatTensor* kernel;
	long nbatch;
	long nelem;
	float *input_data;
	float *weight_data;
	float *output_data;
	long p;

	if(t_->nDimension != 4)
		THError("input: 3D Tensor expected");
	if(k_->nDimension != 4)
		THError("kernel: 4D Tensor expected");
	if(srow < 1)
		THError("Stride should be a positive integer");
	if(scol < 1)
		THError("Stride should be a positive integer");
	if(*vf != 'V' || *xc != 'X')
		THError("Type of convolution can be 'V','X' only");

	input = t_;
	kernel = k_;

	nbatch = input->size[0];
	nInputPlane = input->size[1];
	nInputRows  = input->size[2];
	nInputCols  = input->size[3];

	kstride0    = kernel->stride[0];
	kstride1    = kernel->stride[1];
	nKernelRows = kernel->size[2];
	nKernelCols = kernel->size[3];
	nOutputPlane = kernel->size[0];
	if(kernel->size[1] != nInputPlane)
		THError("invalid number of input planes");

	if(!(nInputRows >= nKernelRows && nInputCols >= nKernelCols))
		THError("conv2Dmv : Input image is smaller than kernel");

	nOutputRows = (nInputRows - nKernelRows) / srow + 1;
	nOutputCols = (nInputCols - nKernelCols) / scol + 1;

	nelem = THFloatTensor_nElement(r_);
	THFloatTensor_resize4d(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols);

	input_data = THFloatTensor_data(input);
	weight_data = THFloatTensor_data(kernel);
	output_data = THFloatTensor_data(r_);

	if (nelem == 0 || beta == 0 || nelem != THFloatTensor_nElement(r_))
	{
		/*THFloatTensor_(zero)(r_);*/
#pragma omp parallel for private(p)
		for (p=0; p < r_->size[0]; p++)
		{
			long k;
			for (k = 0; k < r_->size[1]; k++)
			{
				float* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
				long l;
				for (l = 0; l < nOutputRows*nOutputCols; l++)
					ptr_output[l] = 0.0;
			}
		}
	}
	else if (beta != 1)
	{
		/*THFloatTensor_(mul)(r_, beta);*/
#pragma omp parallel for private(p)
		for(p=0; p < r_->size[0]; p++)
		{
			long k;
			for (k = 0; k < r_->size[1]; k++)
			{
				float* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
				long l;
				for (l = 0; l < nOutputRows*nOutputCols; l++)
					ptr_output[l] *= beta;
			}
		}
	}

#pragma omp parallel for private(p)
	for(p=0; p < nbatch; p++)
	{
		long k;
		for(k = 0; k < nOutputPlane; k++)
		{
			long i;
			/* get output */
			float *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows;
			for(i = 0; i < nInputPlane; i++)
			{
				/* get kernel */
				float *ptr_weight = weight_data + k*kstride0 + i*kstride1;
				/* get input */
				float *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols;

				/* do image, kernel convolution */
				THFloatTensor_validXCorr2Dptr(ptr_output,
					alpha,
					ptr_input,  nInputRows,  nInputCols,
					ptr_weight, nKernelRows, nKernelCols,
					srow, scol);
			}
		}
	}
}