int THProcessYUYV(THNETWORK *network, unsigned char *image, int width, int height, float **results, int *outwidth, int *outheight) { THFloatTensor *out; THFloatStorage *st; #ifdef CUDNN if(network->net->cuda) THError("This function is not supported with CUDNN"); #endif #ifdef OPENCL if(network->net->cuda) THError("This function is not supported with OpenCL"); #endif st = THFloatStorage_new(width * height * 3); yuyv2fRGB(image, st->data, width*height, width, width, height, network->mean, network->std); THFloatTensor *t = THFloatTensor_new(); t->storage = st; t->nDimension = 3; t->size[0] = 3; t->size[1] = height; t->size[2] = width; t->stride[0] = width * height; t->stride[1] = width; t->stride[2] = 1; out = forward(network->net, t); THFloatTensor_free(t); *results = out->storage->data; if(out->nDimension >= 3) { *outwidth = out->size[out->nDimension - 1]; *outheight = out->size[out->nDimension - 2]; } else *outwidth = *outheight = 1; return THFloatTensor_nElement(out); }
int THProcessFloat(THNETWORK *network, float *data, int batchsize, int width, int height, float **result, int *outwidth, int *outheight) { int b, c, i; THFloatTensor *t = THFloatTensor_new(); THFloatTensor *out; t->nDimension = 4; t->size[0] = batchsize; t->size[1] = 3; t->size[2] = height; t->size[3] = width; t->stride[0] = 3 * width * height; t->stride[1] = width * height; t->stride[2] = width; t->stride[3] = 1; t->storage = THFloatStorage_newwithbuffer((float *)data); #pragma omp parallel for private(b, c, i) for(b = 0; b < batchsize; b++) for(c = 0; c < 3; c++) for(i = 0; i < width*height; i++) data[b * t->stride[0] + c * t->stride[1] + i] = (data[b * t->stride[0] + c * t->stride[1] + i] - network->mean[c]) / network->std[c]; #ifdef CUDNN if(network->net->cuda) { THFloatTensor *t2 = THCudaTensor_newFromFloatTensor(t); out = forward(network->net, t2); THFloatTensor_free(t2); if(network->out) THFloatTensor_free(network->out); network->out = THFloatTensor_newFromCudaTensor(out); out = network->out; } else #endif #ifdef OPENCL if(network->net->opencl) { THFloatTensor *t2 = THOpenCLTensor_newFromImageTensor(t); out = forward(network->net, t2); THFloatTensor_free(t2); if(network->out) THFloatTensor_free(network->out); network->out = THFloatTensor_newFromOpenCLImageTensor(out); out = network->out; } else #endif out = forward(network->net, t); THFloatTensor_free(t); *result = out->storage->data; if(out->nDimension >= 3) { *outwidth = out->size[out->nDimension - 1]; *outheight = out->size[out->nDimension - 2]; } else *outwidth = *outheight = 1; return THFloatTensor_nElement(out); }
THFloatTensor *THFloatTensor_newWithStorage1d(THFloatStorage *storage, long storageOffset, long size0, long stride0) { THFloatTensor *t = THFloatTensor_new(); t->nDimension = 1; t->size[0] = size0; t->stride[0] = stride0 == -1 ? 1 : stride0; t->storage = storage; t->storageOffset = storageOffset; THAtomicIncrement(&t->storage->nref); return t; }
void THMakeSpatial(THNETWORK *network) { int i, size = 231, nInputPlane = 3; for(i = 0; i < network->net->nelem; i++) { if(network->net->modules[i].type == MT_View || network->net->modules[i].type == MT_Reshape) { THFloatTensor_free(network->net->modules[i].output); memmove(network->net->modules+i, network->net->modules+i+1, sizeof(*network->net->modules) * (network->net->nelem - i - 1)); network->net->nelem--; i--; } else if(network->net->modules[i].type == MT_Linear) { THFloatTensor_free(network->net->modules[i].Linear.addBuffer); network->net->modules[i].updateOutput = nn_SpatialConvolutionMM_updateOutput; #ifndef USEBLAS network->net->modules[i].type = MT_SpatialConvolutionVirtMM; #else network->net->modules[i].type = MT_SpatialConvolutionMM; #endif struct SpatialConvolution *c = &network->net->modules[i].SpatialConvolution; c->finput = THFloatTensor_new(); c->padW = c->padH = 0; c->dW = c->dH = 1; c->kW = c->kH = size; c->nInputPlane = nInputPlane; nInputPlane = c->nOutputPlane = c->weight->size[0]; size = (size + 2*c->padW - c->kW) / c->dW + 1; } else if(network->net->modules[i].type == MT_SpatialConvolution || network->net->modules[i].type == MT_SpatialConvolutionMM || network->net->modules[i].type == MT_SpatialConvolutionVirtMM) { struct SpatialConvolution *c = &network->net->modules[i].SpatialConvolution; size = (size + 2*c->padW - c->kW) / c->dW + 1; nInputPlane = network->net->modules[i].SpatialConvolution.nOutputPlane; } else if(network->net->modules[i].type == MT_SpatialMaxPooling) { struct SpatialMaxPooling *c = &network->net->modules[i].SpatialMaxPooling; if(c->ceil_mode) size = (long)(ceil((float)(size - c->kH + 2*c->padH) / c->dH)) + 1; else size = (long)(floor((float)(size - c->kH + 2*c->padH) / c->dH)) + 1; } else if(network->net->modules[i].type == MT_SpatialZeroPadding) { struct SpatialZeroPadding *c = &network->net->modules[i].SpatialZeroPadding; size += c->pad_l + c->pad_r; } } }
THFloatTensor *THFloatTensor_newWithStorage3d(THFloatStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2) { THFloatTensor *t = THFloatTensor_new(); t->nDimension = 3; t->size[0] = size0; t->size[1] = size1; t->size[2] = size2; t->stride[0] = stride0 == -1 ? size1 * size2 : stride0; t->stride[1] = stride1 == -1 ? size2 : stride1; t->stride[2] = stride2 == -1 ? 1 : stride2; t->storage = storage; t->storageOffset = storageOffset; THAtomicIncrement(&t->storage->nref); return t; }
THFloatTensor *forward(struct network *net, THFloatTensor *in) { int i; double t = 0, convtot = 0, convflops = 0; #ifdef OPENCL if(net->opencl == 1) OpenCL_Build(net, in); #endif for(i = 0; i < net->nelem; i++) { if(th_profile) t = th_seconds(); in = net->modules[i].updateOutput(&net->modules[i], in); // You can remove these lines if you don't have problems with memory // These lines free intermediate results if(i > 0) { THFloatTensor_free(net->modules[i-1].output); net->modules[i-1].output = THFloatTensor_new(); } if(th_profile) { #ifdef OPENCL if(net->opencl) clFinish(cl_queue); #endif t = th_seconds() - t; if(net->modules[i].type == MT_SpatialConvolutionMM || net->modules[i].type == MT_SpatialConvolutionVirtMM || net->modules[i].type == MT_SpatialConvolution) { double flops = 2.0 * THFloatTensor_nElement(in) * net->modules[i].SpatialConvolution.nInputPlane * net->modules[i].SpatialConvolution.kW * net->modules[i].SpatialConvolution.kH; printf("%f seconds for module %d, %f Gflops/s\n", t, i+1, flops * 1e-9 / t); convtot += t; convflops += flops; } else printf("%f seconds for module %d\n", t, i+1); } if(th_debug > 1) printf("%d) %d %d %ld %ld %ld %ld\n", i+1, net->modules[i].type, in->nDimension, in->size[0], in->size[1], in->size[2], in->size[3]); } if(th_profile) printf("%f seconds for convolutions %f Gflops/s\n", convtot, convflops * 1e-9 / convtot); return in; }
int THProcessImages(THNETWORK *network, unsigned char **images, int batchsize, int width, int height, int stride, float **results, int *outwidth, int *outheight, int bgr) { int i; THFloatTensor *out, *t = 0; THFloatStorage *st; #ifdef CUDNN if(network->net->cuda) { #ifdef HAVEFP16 if(floattype == CUDNN_DATA_HALF) { st = THCudaStorage_new(batchsize * (width * height * 3)); for(i = 0; i < batchsize; i++) cuda_rgb2half((unsigned short *)st->data + i * (width * height * 3), images[i], width, height, stride, network->mean, network->std, bgr); } else #endif { st = THCudaStorage_new(batchsize * width * height * 3); for(i = 0; i < batchsize; i++) cuda_rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std, bgr); } } else #endif #ifdef OPENCL if(network->net->opencl) t = OpenCL_LoadImage(images[0], width, height, stride, network->mean, network->std, bgr); else #endif { st = THFloatStorage_new(batchsize * width * height * 3); if(bgr) #pragma omp parallel for if(batchsize>1) private(i) for(i = 0; i < batchsize; i++) bgr2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std); else #pragma omp parallel for if(batchsize>1) private(i) for(i = 0; i < batchsize; i++) rgb2float(st->data + i * width * height * 3, images[i], width, height, stride, network->mean, network->std); } if(!t) { t = THFloatTensor_new(); t->storage = st; if(batchsize == 1) { t->nDimension = 3; t->size[0] = 3; t->size[1] = height; t->size[2] = width; t->stride[0] = width * height; t->stride[1] = width; t->stride[2] = 1; } else { t->nDimension = 4; t->size[0] = batchsize; t->size[1] = 3; t->size[2] = height; t->size[3] = width; t->stride[0] = 3 * width * height; t->stride[1] = width * height; t->stride[2] = width; t->stride[3] = 1; } } #ifdef CUDNN if(network->net->cuda) { out = forward(network->net, t); if(network->out) THFloatTensor_free(network->out); #ifdef HAVEFP16 if(floattype == CUDNN_DATA_HALF) network->out = THFloatTensor_newFromHalfCudaTensor(out); else #endif network->out = THFloatTensor_newFromCudaTensor(out); out = network->out; } else #endif #ifdef OPENCL if(network->net->opencl) { out = forward(network->net, t); if(network->out) THFloatTensor_free(network->out); #ifdef HAVEFP16 if(cl_datasize == 2) network->out = THFloatTensor_newFromHalfOpenCLImageTensor(out); else #endif network->out = THFloatTensor_newFromOpenCLImageTensor(out); out = network->out; } else #endif out = forward(network->net, t); THFloatTensor_free(t); *results = out->storage->data; if(out->nDimension >= 3) { *outwidth = out->size[out->nDimension - 1]; *outheight = out->size[out->nDimension - 2]; } else *outwidth = *outheight = 1; return THFloatTensor_nElement(out); }
THFloatTensor *THFloatTensor_newWithTensor(THFloatTensor *tensor) { THFloatTensor *self = THFloatTensor_new(); THFloatTensor_set(self, tensor); return self; }