예제 #1
0
파일: thapi.c 프로젝트: Aerobota/thnets
int THProcessYUYV(THNETWORK *network, unsigned char *image, int width, int height, float **results, int *outwidth, int *outheight)
{
	THFloatTensor *out;
	THFloatStorage *st;

#ifdef CUDNN
	if(network->net->cuda)
		THError("This function is not supported with CUDNN");
#endif
#ifdef OPENCL
	if(network->net->cuda)
		THError("This function is not supported with OpenCL");
#endif
	st = THFloatStorage_new(width * height * 3);
	yuyv2fRGB(image, st->data, width*height, width, width, height, network->mean, network->std);
	THFloatTensor *t = THFloatTensor_new();
	t->storage = st;
	t->nDimension = 3;
	t->size[0] = 3;
	t->size[1] = height;
	t->size[2] = width;
	t->stride[0] = width * height;
	t->stride[1] = width;
	t->stride[2] = 1;
	out = forward(network->net, t);
	THFloatTensor_free(t);
	*results = out->storage->data;
	if(out->nDimension >= 3)
	{
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
}
예제 #2
0
파일: thapi.c 프로젝트: Aerobota/thnets
int THProcessFloat(THNETWORK *network, float *data, int batchsize, int width, int height, float **result, int *outwidth, int *outheight)
{
	int b, c, i;
	THFloatTensor *t = THFloatTensor_new();
	THFloatTensor *out;
	t->nDimension = 4;
	t->size[0] = batchsize;
	t->size[1] = 3;
	t->size[2] = height;
	t->size[3] = width;
	t->stride[0] = 3 * width * height;
	t->stride[1] = width * height;
	t->stride[2] = width;
	t->stride[3] = 1;
	t->storage = THFloatStorage_newwithbuffer((float *)data);
#pragma omp parallel for private(b, c, i)
	for(b = 0; b < batchsize; b++)
		for(c = 0; c < 3; c++)
			for(i = 0; i < width*height; i++)
				data[b * t->stride[0] + c * t->stride[1] + i] =
					(data[b * t->stride[0] + c * t->stride[1] + i] - network->mean[c]) / network->std[c];
#ifdef CUDNN
	if(network->net->cuda)
	{
		THFloatTensor *t2 = THCudaTensor_newFromFloatTensor(t);
		out = forward(network->net, t2);
		THFloatTensor_free(t2);
		if(network->out)
			THFloatTensor_free(network->out);
		network->out = THFloatTensor_newFromCudaTensor(out);
		out = network->out;
	} else
#endif
#ifdef OPENCL
	if(network->net->opencl)
	{
		THFloatTensor *t2 = THOpenCLTensor_newFromImageTensor(t);
		out = forward(network->net, t2);
		THFloatTensor_free(t2);
		if(network->out)
			THFloatTensor_free(network->out);
		network->out = THFloatTensor_newFromOpenCLImageTensor(out);
		out = network->out;
	} else
#endif
	out = forward(network->net, t);
	THFloatTensor_free(t);
	*result = out->storage->data;
	if(out->nDimension >= 3)
	{
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
}
예제 #3
0
파일: thbasic.c 프로젝트: lijian8/thnets
THFloatTensor *THFloatTensor_newWithStorage1d(THFloatStorage *storage, long storageOffset, long size0, long stride0)
{
	THFloatTensor *t = THFloatTensor_new();
	t->nDimension = 1;
	t->size[0] = size0;
	t->stride[0] = stride0 == -1 ? 1 : stride0;
	t->storage = storage;
	t->storageOffset = storageOffset;
	THAtomicIncrement(&t->storage->nref);
	return t;
}
예제 #4
0
파일: thapi.c 프로젝트: Aerobota/thnets
void THMakeSpatial(THNETWORK *network)
{
	int i, size = 231, nInputPlane = 3;
	
	for(i = 0; i < network->net->nelem; i++)
	{
		if(network->net->modules[i].type == MT_View || network->net->modules[i].type == MT_Reshape)
		{
			THFloatTensor_free(network->net->modules[i].output);
			memmove(network->net->modules+i, network->net->modules+i+1, sizeof(*network->net->modules) * (network->net->nelem - i - 1));
			network->net->nelem--;
			i--;
		} else if(network->net->modules[i].type == MT_Linear)
		{
			THFloatTensor_free(network->net->modules[i].Linear.addBuffer);
			network->net->modules[i].updateOutput = nn_SpatialConvolutionMM_updateOutput;
#ifndef USEBLAS
			network->net->modules[i].type = MT_SpatialConvolutionVirtMM;
#else
			network->net->modules[i].type = MT_SpatialConvolutionMM;
#endif
			struct SpatialConvolution *c = &network->net->modules[i].SpatialConvolution;
			c->finput = THFloatTensor_new();
			c->padW = c->padH = 0;
			c->dW = c->dH = 1;
			c->kW = c->kH = size;
			c->nInputPlane = nInputPlane;
			nInputPlane = c->nOutputPlane = c->weight->size[0];
			size = (size + 2*c->padW - c->kW) / c->dW + 1;
		} else if(network->net->modules[i].type == MT_SpatialConvolution ||
			network->net->modules[i].type == MT_SpatialConvolutionMM ||
			network->net->modules[i].type == MT_SpatialConvolutionVirtMM)
		{
			struct SpatialConvolution *c = &network->net->modules[i].SpatialConvolution;
			size = (size + 2*c->padW - c->kW) / c->dW + 1;
			nInputPlane = network->net->modules[i].SpatialConvolution.nOutputPlane;
		} else if(network->net->modules[i].type == MT_SpatialMaxPooling)
		{
			struct SpatialMaxPooling *c = &network->net->modules[i].SpatialMaxPooling;
			if(c->ceil_mode)
				size = (long)(ceil((float)(size - c->kH + 2*c->padH) / c->dH)) + 1;
			else size = (long)(floor((float)(size - c->kH + 2*c->padH) / c->dH)) + 1;
		} else if(network->net->modules[i].type == MT_SpatialZeroPadding)
		{
			struct SpatialZeroPadding *c = &network->net->modules[i].SpatialZeroPadding;
			size += c->pad_l + c->pad_r;
		}
	}
}
예제 #5
0
파일: thbasic.c 프로젝트: lijian8/thnets
THFloatTensor *THFloatTensor_newWithStorage3d(THFloatStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2)
{
	THFloatTensor *t = THFloatTensor_new();
	t->nDimension = 3;
	t->size[0] = size0;
	t->size[1] = size1;
	t->size[2] = size2;
	t->stride[0] = stride0 == -1 ? size1 * size2 : stride0;
	t->stride[1] = stride1 == -1 ? size2 : stride1;
	t->stride[2] = stride2 == -1 ? 1 : stride2;
	t->storage = storage;
	t->storageOffset = storageOffset;
	THAtomicIncrement(&t->storage->nref);
	return t;
}
예제 #6
0
파일: thapi.c 프로젝트: Aerobota/thnets
THFloatTensor *forward(struct network *net, THFloatTensor *in)
{
	int i;
	double t = 0, convtot = 0, convflops = 0;
	
#ifdef OPENCL
	if(net->opencl == 1)
		OpenCL_Build(net, in);
#endif
	for(i = 0; i < net->nelem; i++)
	{
		if(th_profile)
			t = th_seconds();
		in = net->modules[i].updateOutput(&net->modules[i], in);
		// You can remove these lines if you don't have problems with memory
		// These lines free intermediate results
		if(i > 0)
		{
			THFloatTensor_free(net->modules[i-1].output);
			net->modules[i-1].output = THFloatTensor_new();
		}
		if(th_profile)
		{
#ifdef OPENCL
			if(net->opencl)
				clFinish(cl_queue);
#endif
			t = th_seconds() - t;
			if(net->modules[i].type == MT_SpatialConvolutionMM ||
				net->modules[i].type == MT_SpatialConvolutionVirtMM ||
				net->modules[i].type == MT_SpatialConvolution)
			{
				double flops = 2.0 * THFloatTensor_nElement(in) * net->modules[i].SpatialConvolution.nInputPlane *
					net->modules[i].SpatialConvolution.kW * net->modules[i].SpatialConvolution.kH;
				printf("%f seconds for module %d, %f Gflops/s\n", t, i+1, flops * 1e-9 / t);
				convtot += t;
				convflops += flops;
			} else printf("%f seconds for module %d\n", t, i+1);
		}
		if(th_debug > 1)
			printf("%d) %d %d %ld %ld %ld %ld\n", i+1, net->modules[i].type, in->nDimension, in->size[0], in->size[1], in->size[2], in->size[3]);
	}
	if(th_profile)
		printf("%f seconds for convolutions %f Gflops/s\n", convtot, convflops * 1e-9 / convtot);
	return in;
}
예제 #7
0
파일: thapi.c 프로젝트: Aerobota/thnets
int THProcessImages(THNETWORK *network, unsigned char **images, int batchsize, int width, int height, int stride, float **results, int *outwidth, int *outheight, int bgr)
{
	int i;
	THFloatTensor *out, *t = 0;
	THFloatStorage *st;
	
#ifdef CUDNN
	if(network->net->cuda)
	{
#ifdef HAVEFP16
		if(floattype == CUDNN_DATA_HALF)
		{
			st = THCudaStorage_new(batchsize * (width * height * 3));
			for(i = 0; i < batchsize; i++)
				cuda_rgb2half((unsigned short *)st->data + i * (width * height * 3), images[i], width, height, stride, network->mean, network->std, bgr);
		} else
#endif
		{
			st = THCudaStorage_new(batchsize * width * height * 3);
			for(i = 0; i < batchsize; i++)
				cuda_rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std, bgr);
		}
	} else
#endif
#ifdef OPENCL
	if(network->net->opencl)
		t = OpenCL_LoadImage(images[0], width, height, stride, network->mean, network->std, bgr);
	else
#endif
	{
		st = THFloatStorage_new(batchsize * width * height * 3);
		if(bgr)
#pragma omp parallel for if(batchsize>1) private(i)
			for(i = 0; i < batchsize; i++)
				bgr2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std);
		else
#pragma omp parallel for if(batchsize>1) private(i)
			for(i = 0; i < batchsize; i++)
				rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std);
	}
	if(!t)
	{
		t = THFloatTensor_new();
		t->storage = st;
		if(batchsize == 1)
		{
			t->nDimension = 3;
			t->size[0] = 3;
			t->size[1] = height;
			t->size[2] = width;
			t->stride[0] = width * height;
			t->stride[1] = width;
			t->stride[2] = 1;
		} else {
			t->nDimension = 4;
			t->size[0] = batchsize;
			t->size[1] = 3;
			t->size[2] = height;
			t->size[3] = width;
			t->stride[0] = 3 * width * height;
			t->stride[1] = width * height;
			t->stride[2] = width;
			t->stride[3] = 1;
		}
	}
#ifdef CUDNN
	if(network->net->cuda)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
#ifdef HAVEFP16
		if(floattype == CUDNN_DATA_HALF)
			network->out = THFloatTensor_newFromHalfCudaTensor(out);
		else
#endif
			network->out = THFloatTensor_newFromCudaTensor(out);
		out = network->out;
	} else
#endif
#ifdef OPENCL
	if(network->net->opencl)
	{
		out = forward(network->net, t);
		if(network->out)
			THFloatTensor_free(network->out);
#ifdef HAVEFP16
		if(cl_datasize == 2)
			network->out = THFloatTensor_newFromHalfOpenCLImageTensor(out);
		else
#endif
			network->out = THFloatTensor_newFromOpenCLImageTensor(out);
		out = network->out;
	} else
#endif
		out = forward(network->net, t);
	THFloatTensor_free(t);
	*results = out->storage->data;
	if(out->nDimension >= 3)
	{
		*outwidth = out->size[out->nDimension - 1];
		*outheight = out->size[out->nDimension - 2];
	} else *outwidth = *outheight = 1;
	return THFloatTensor_nElement(out);
}
예제 #8
0
파일: thbasic.c 프로젝트: lijian8/thnets
THFloatTensor *THFloatTensor_newWithTensor(THFloatTensor *tensor)
{
	THFloatTensor *self = THFloatTensor_new();
	THFloatTensor_set(self, tensor);
	return self;
}