Exemplo n.º 1
0
int THProcessImages(THNETWORK *network, unsigned char **images, int batchsize, int width, int height, int stride, float **results, int *outwidth, int *outheight, int bgr)
{
	int i;
	THFloatTensor *out, *t = 0;
	THFloatStorage *st;
	
#ifdef CUDNN
	if(network->net->engine == ENGINE_CUDA)
	{
#ifdef HAVEFP16
		if(floattype == CUDNN_DATA_HALF)
		{
			st = THCudaStorage_new(batchsize * (width * height * 3));
			for(i = 0; i < batchsize; i++)
				cuda_rgb2half((unsigned short *)st->data + i * (width * height * 3), images[i], width, height, stride, network->mean, network->std, bgr);
		} else
#endif
		{
			st = THCudaStorage_new(batchsize * width * height * 3);
			for(i = 0; i < batchsize; i++)
				cuda_rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std, bgr);
		}
	} else
#endif
#ifdef OPENCL
	if(network->net->engine == ENGINE_OPENCL || network->net->engine == ENGINE_OPENCLINIT)
		t = OpenCL_LoadImage(images[0], width, height, stride, network->mean, network->std, bgr);
	else
#endif
#ifdef LOWP
	if(network->net->engine == ENGINE_LOWP)
		t = Lowp_LoadImages(images, batchsize, width, height, stride, network->mean, network->std, bgr);
	else
#endif
	{
		st = THFloatStorage_new(batchsize * width * height * 3);
		if(bgr)
#pragma omp parallel for if(batchsize>1) private(i)
			for(i = 0; i < batchsize; i++)
				bgr2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std);
		else
#pragma omp parallel for if(batchsize>1) private(i)
			for(i = 0; i < batchsize; i++)
				rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std);
	}
	if(!t)
	{
		t = THFloatTensor_new();
		t->storage = st;
		if(batchsize == 1)
		{
			t->nDimension = 3;
			t->size[0] = 3;
			t->size[1] = height;
			t->size[2] = width;
			t->stride[0] = width * height;
			t->stride[1] = width;
			t->stride[2] = 1;
		} else {
			t->nDimension = 4;
			t->size[0] = batchsize;
			t->size[1] = 3;
			t->size[2] = height;
			t->size[3] = width;
			t->stride[0] = 3 * width * height;
			t->stride[1] = width * height;
			t->stride[2] = width;
			t->stride[3] = 1;
		}
	}
#ifdef CUDNN
	if(network->net->engine == ENGINE_CUDA)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
#ifdef HAVEFP16
		if(floattype == CUDNN_DATA_HALF)
			network->out = THFloatTensor_newFromHalfCudaTensor(out);
		else
#endif
			network->out = THFloatTensor_newFromCudaTensor(out);
		out = network->out;
	} else
#endif
#ifdef OPENCL
	if(network->net->engine == ENGINE_OPENCL || network->net->engine == ENGINE_OPENCLINIT)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
#ifdef HAVEFP16
		if(cl_datasize == 2)
			network->out = THFloatTensor_newFromHalfOpenCLImageTensor(out);
		else
#endif
			network->out = THFloatTensor_newFromOpenCLImageTensor(out);
		out = network->out;
	} else
#endif
#ifdef LOWP
	if(network->net->engine == ENGINE_LOWP)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
		network->out = THFloatTensor_newFromLowpTensor(out);
		out = network->out;
	} else
#endif
		out = forward(network->net, t);
	THFloatTensor_free(t);
	*results = out->storage->data;
	if(out->nDimension >= 3)
	{
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
}
Exemplo n.º 2
0
int THProcessFloat(THNETWORK *network, float *data, int batchsize, int width, int height, float **result, int *outwidth, int *outheight)
{
	int b, c, i;
	THFloatTensor *t = THFloatTensor_new();
	THFloatTensor *out;
	t->nDimension = 4;
	t->size[0] = batchsize;
	t->size[1] = 3;
	t->size[2] = height;
	t->size[3] = width;
	t->stride[0] = 3 * width * height;
	t->stride[1] = width * height;
	t->stride[2] = width;
	t->stride[3] = 1;
	t->storage = THFloatStorage_newwithbuffer((float *)data);
#pragma omp parallel for private(b, c, i)
	for(b = 0; b < batchsize; b++)
		for(c = 0; c < 3; c++)
			for(i = 0; i < width*height; i++)
				data[b * t->stride[0] + c * t->stride[1] + i] =
					(data[b * t->stride[0] + c * t->stride[1] + i] - network->mean[c]) / network->std[c];
#ifdef CUDNN
	if(network->net->engine == ENGINE_CUDA)
	{
		THFloatTensor *t2 = THCudaTensor_newFromFloatTensor(t);
		out = forward(network->net, t2);
		THFloatTensor_free(t2);
		if(network->out)
			THFloatTensor_free(network->out);
		network->out = THFloatTensor_newFromCudaTensor(out);
		out = network->out;
	} else
#endif
#ifdef OPENCL
	if(network->net->engine == ENGINE_OPENCL || network->net->engine == ENGINE_OPENCLINIT)
	{
		THFloatTensor *t2 = THOpenCLTensor_newFromImageTensor(t);
		out = forward(network->net, t2);
		THFloatTensor_free(t2);
		if(network->out)
			THFloatTensor_free(network->out);
		network->out = THFloatTensor_newFromOpenCLImageTensor(out);
		out = network->out;
	} else
#endif
#ifdef LOWP
	if(network->net->engine == ENGINE_LOWP)
	{
		THFloatTensor *t2 = THLowpTensor_newFromFloatTensor(t);
		out = forward(network->net, t2);
		THFloatTensor_free(t2);
		if(network->out)
			THFloatTensor_free(network->out);
		network->out = THFloatTensor_newFromLowpTensor(out);
		out = network->out;
	} else
#endif
	out = forward(network->net, t);
	THFloatTensor_free(t);
	*result = out->storage->data;
	if(out->nDimension >= 3)
	{
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
}
Exemplo n.º 3
0
void THFloatTensor_conv2Dmm(THFloatTensor *r_, float beta, float alpha, THFloatTensor *t_, THFloatTensor *k_, long srow, long scol, const char *vf, const char *xc)
{
	long nInputPlane, nInputRows, nInputCols;
	long nKernelRows, nKernelCols;
	long nOutputPlane, nOutputRows, nOutputCols;
	long kstride0, kstride1;
	THFloatTensor *input;
	THFloatTensor* kernel;
	long nbatch;
	long nelem;
	float *input_data;
	float *weight_data;
	float *output_data;
	long p;

	if(t_->nDimension != 4)
		THError("input: 3D Tensor expected");
	if(k_->nDimension != 4)
		THError("kernel: 4D Tensor expected");
	if(srow < 1)
		THError("Stride should be a positive integer");
	if(scol < 1)
		THError("Stride should be a positive integer");
	if(*vf != 'V' || *xc != 'X')
		THError("Type of convolution can be 'V','X' only");

	input = t_;
	kernel = k_;

	nbatch = input->size[0];
	nInputPlane = input->size[1];
	nInputRows  = input->size[2];
	nInputCols  = input->size[3];

	kstride0    = kernel->stride[0];
	kstride1    = kernel->stride[1];
	nKernelRows = kernel->size[2];
	nKernelCols = kernel->size[3];
	nOutputPlane = kernel->size[0];
	if(kernel->size[1] != nInputPlane)
		THError("invalid number of input planes");

	if(!(nInputRows >= nKernelRows && nInputCols >= nKernelCols))
		THError("conv2Dmv : Input image is smaller than kernel");

	nOutputRows = (nInputRows - nKernelRows) / srow + 1;
	nOutputCols = (nInputCols - nKernelCols) / scol + 1;

	nelem = THFloatTensor_nElement(r_);
	THFloatTensor_resize4d(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols);

	input_data = THFloatTensor_data(input);
	weight_data = THFloatTensor_data(kernel);
	output_data = THFloatTensor_data(r_);

	if (nelem == 0 || beta == 0 || nelem != THFloatTensor_nElement(r_))
	{
		/*THFloatTensor_(zero)(r_);*/
#pragma omp parallel for private(p)
		for (p=0; p < r_->size[0]; p++)
		{
			long k;
			for (k = 0; k < r_->size[1]; k++)
			{
				float* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
				long l;
				for (l = 0; l < nOutputRows*nOutputCols; l++)
					ptr_output[l] = 0.0;
			}
		}
	}
	else if (beta != 1)
	{
		/*THFloatTensor_(mul)(r_, beta);*/
#pragma omp parallel for private(p)
		for(p=0; p < r_->size[0]; p++)
		{
			long k;
			for (k = 0; k < r_->size[1]; k++)
			{
				float* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
				long l;
				for (l = 0; l < nOutputRows*nOutputCols; l++)
					ptr_output[l] *= beta;
			}
		}
	}

#pragma omp parallel for private(p)
	for(p=0; p < nbatch; p++)
	{
		long k;
		for(k = 0; k < nOutputPlane; k++)
		{
			long i;
			/* get output */
			float *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows;
			for(i = 0; i < nInputPlane; i++)
			{
				/* get kernel */
				float *ptr_weight = weight_data + k*kstride0 + i*kstride1;
				/* get input */
				float *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols;

				/* do image, kernel convolution */
				THFloatTensor_validXCorr2Dptr(ptr_output,
					alpha,
					ptr_input,  nInputRows,  nInputCols,
					ptr_weight, nKernelRows, nKernelCols,
					srow, scol);
			}
		}
	}
}
Exemplo n.º 4
0
void THFloatTensor_copy(THFloatTensor *tdst, THFloatTensor *tsrc)
{
	memcpy(tdst->storage->data, tsrc->storage->data, sizeof(*tdst->storage->data) * THFloatTensor_nElement(tsrc));
}
Exemplo n.º 5
0
void THFloatTensor_fill(THFloatTensor *t, float value)
{
	THFloatVector_fill(t->storage->data, value, THFloatTensor_nElement(t));
}
Exemplo n.º 6
0
void THFloatTensor_zero(THFloatTensor *t)
{
	memset(t->storage->data, 0, THFloatTensor_nElement(t) * sizeof(*t->storage->data));
}