THFloatTensor *nn_SpatialConvolutionMM_updateOutput(struct module *module, THFloatTensor *input) { int kW = module->SpatialConvolution.kW; int kH = module->SpatialConvolution.kH; int dW = module->SpatialConvolution.dW; int dH = module->SpatialConvolution.dH; int padW = module->SpatialConvolution.padW; int padH = module->SpatialConvolution.padH; THFloatTensor *finput = module->SpatialConvolution.finput; THFloatTensor *weight = module->SpatialConvolution.weight; THFloatTensor *bias = module->SpatialConvolution.bias; THFloatTensor *output = module->output; int batch = 1; if (input->nDimension == 3) { batch = 0; THFloatTensor_resize4d(input, 1, input->size[0], input->size[1], input->size[2]); } long batchSize = input->size[0]; long nInputPlane = module->SpatialConvolution.nInputPlane; long nOutputPlane = module->SpatialConvolution.nOutputPlane; long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth + 2*padW - kW) / dW + 1; long outputHeight = (inputHeight + 2*padH - kH) / dH + 1; if (outputWidth < 1 || outputHeight < 1) THError("Given input size: (%dx%dx%d). Calculated output size: (%dx%dx%d). Output size is too small", nInputPlane,inputHeight,inputWidth,nOutputPlane,outputHeight,outputWidth); THFloatTensor_resize3d(finput, batchSize, kW*kH*nInputPlane, outputHeight*outputWidth); THFloatTensor_resize4d(output, batchSize, nOutputPlane, outputHeight, outputWidth); long t; #pragma omp parallel for if(batchSize >= 4) private(t) for (t = 0; t < batchSize; t++) { THFloatTensor *input_t = THFloatTensor_newSelect(input, 0, t); THFloatTensor *output_t = THFloatTensor_newSelect(output, 0, t); THFloatTensor *finput_t = THFloatTensor_newSelect(finput, 0, t); nn_SpatialConvolutionMM_updateOutput_frame(input_t, output_t, weight, bias, finput_t, kW, kH, dW, dH, padW, padH, nInputPlane, inputWidth, inputHeight, nOutputPlane, outputWidth, outputHeight); THFloatTensor_free(input_t); THFloatTensor_free(output_t); THFloatTensor_free(finput_t); } if (batch == 0) { THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth); THFloatTensor_resize3d(input, nInputPlane, inputHeight, inputWidth); } return output; }
// frame grabber static int l_grabFrame (lua_State *L) { // Get Tensor's Info const int idx = lua_tonumber(L, 1); THFloatTensor * tensor = luaT_checkudata(L, 2, luaT_checktypename2id(L, "torch.FloatTensor")); // grab frame frame[idx] = cvQueryFrame ( capture[idx] ); if( !frame[idx] ) { perror("could not query OpenCV capture"); } // resize given tensor THFloatTensor_resize3d(tensor, 3, frame[idx]->height, frame[idx]->width); // copy to tensor int m0 = tensor->stride[1]; int m1 = tensor->stride[2]; int m2 = tensor->stride[0]; unsigned char *src = frame[idx]->imageData; float *dst = THFloatTensor_data(tensor); int i, j, k; for (i=0; i < frame[idx]->height; i++) { for (j=0, k=0; j < frame[idx]->width; j++, k+=m1) { // red: dst[k] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 2]/255.; // green: dst[k+m2] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 1]/255.; // blue: dst[k+2*m2] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 0]/255.; } dst += m0; } return 0; }
THFloatTensor *nn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input) { int dW = module->SpatialConvolution.dW; int dH = module->SpatialConvolution.dH; THFloatTensor *weight = module->SpatialConvolution.weight; THFloatTensor *bias = module->SpatialConvolution.bias; THFloatTensor *output = module->output; int dimw = 2; int dimh = 1; if (input->nDimension == 4) { dimw++; dimh++; } long nOutputPlane = weight->size[0]; long kW = weight->size[3]; long kH = weight->size[2]; long inputWidth = input->size[dimw]; long inputHeight = input->size[dimh]; long outputWidth = (inputWidth - kW) / dW + 1; long outputHeight = (inputHeight - kH) / dH + 1; if (input->nDimension == 3) { long i; float *bias_data; float *output_data; THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth); /* add bias */ bias_data = THFloatTensor_data(bias); output_data = THFloatTensor_data(output); #pragma omp parallel for private(i) for (i=0; i<bias->size[0]; i++) { float *ptr_output = output_data + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } THFloatTensor_conv2Dmv(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); } else { float *bias_data; float *output_data; long p; THFloatTensor_resize4d(output, input->size[0], nOutputPlane, outputHeight, outputWidth); bias_data = THFloatTensor_data(bias); output_data = THFloatTensor_data(output); #pragma omp parallel for private(p) for (p=0; p<input->size[0]; p++) { /* BIAS */ long i; for (i=0; i<bias->size[0]; i++) { float *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } } /* do convolutions */ THFloatTensor_conv2Dmm(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); } return output; }
void THFloatTensor_conv2Dmv(THFloatTensor *r_, float beta, float alpha, THFloatTensor *t_, THFloatTensor *k_, long srow, long scol, const char *vf, const char *xc) { long nInputPlane, nInputRows, nInputCols; long nKernelRows, nKernelCols; long nOutputPlane, nOutputRows, nOutputCols; long istride0, kstride0, kstride1; THFloatTensor *input; THFloatTensor *kernel; float *input_data; float *weight_data; float *output_data; long nelem; long k; if(t_->nDimension != 3) THError("input: 3D Tensor expected"); if(k_->nDimension != 4) THError("kernel: 4D Tensor expected"); if(srow < 1) THError("Stride should be a positive integer"); if(scol < 1) THError("Stride should be a positive integer"); if(*vf != 'V' || *xc != 'X') THError("Type of convolution can be 'V','X' only"); input = t_; kernel = k_; nInputPlane = input->size[0]; istride0 = input->stride[0]; nInputRows = input->size[1]; nInputCols = input->size[2]; kstride0 = kernel->stride[0]; kstride1 = kernel->stride[1]; nKernelRows = kernel->size[2]; nKernelCols = kernel->size[3]; nOutputPlane = kernel->size[0]; if(kernel->size[1] != nInputPlane) THError("invalid number of input planes"); if(!(nInputRows >= nKernelRows && nInputCols >= nKernelCols)) THError("conv2Dmv : Input image is smaller than kernel"); nOutputRows = (nInputRows - nKernelRows) / srow + 1; nOutputCols = (nInputCols - nKernelCols) / scol + 1; nelem = THFloatTensor_nElement(r_); THFloatTensor_resize3d(r_, nOutputPlane, nOutputRows, nOutputCols); input_data = THFloatTensor_data(input); weight_data = THFloatTensor_data(kernel); output_data = THFloatTensor_data(r_); if (nelem == 0 || beta == 0 || nelem != THFloatTensor_nElement(r_)) { /*THFloatTensor_zero)(r_);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]; k++) { float* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] = 0.0; } } else if (beta != 1) { /*THFloatTensor_mul)(r_, beta);*/ #pragma omp parallel for private(k) for (k = 0; k < r_->size[0]; k++) { float* ptr_output = output_data + k*nOutputCols*nOutputRows; long l; for (l = 0; l < nOutputRows*nOutputCols; l++) ptr_output[l] *= beta; } } #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { long i; /* get output */ float *ptr_output = output_data + k*nOutputCols*nOutputRows; for(i = 0; i < nInputPlane; i++) { /* get kernel */ float *ptr_weight = weight_data + k*kstride0 + i*kstride1; /* get input */ float *ptr_input = input_data + i*istride0; /* do image, kernel convolution */ THFloatTensor_validXCorr2Dptr(ptr_output, alpha, ptr_input, nInputRows, nInputCols, ptr_weight, nKernelRows, nKernelCols, srow, scol); } } }