static int QPSolver_n(lua_State *L) { SVQP2 *qp = (SVQP2*)luaT_checkudata(L, 1, QPSolver_id); lua_pushnumber(L, qp->n); return 1; }
static int nn_(SpatialGraph_updateOutput)(lua_State *L) { // get all params THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int connex = luaT_getfieldcheckint(L, 1, "connex"); int dist = luaT_getfieldcheckint(L, 1, "dist"); int norm = luaT_getfieldcheckint(L, 1, "normalize"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); // dims int iwidth = input->size[2]; int iheight = input->size[1]; int ichannels = input->size[0]; int owidth = iwidth; int oheight = iheight; int ochannels = connex / 2; // norm ? double normer = (norm == 1) ? 1/sqrt(ichannels) : 1; // zero output THTensor_(zero)(output); // Euclidean distance if (dist == 0) { // Sum[ (Xi - Xi+1)^2 ] int x,y,k; for (k=0; k<ichannels; k++) { for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { if (x < owidth-1) { double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1)); THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x)); } if (y < oheight-1) { double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x)); THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x)); } } } } // Sqrt[ Sum[ (Xi - Xi+1)^2 ] ] for (k=0; k<ochannels; k++) { for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { THTensor_(set3d)(output, k, y, x, sqrt(THTensor_(get3d)(output, k, y, x)) * normer); } } } // Cosine dissimilarity } else { // add epsilon to input (to get rid of 0s) THTensor *inputb = THTensor_(newWithSize3d)(input->size[0], input->size[1], input->size[2]); THTensor_(copy)(inputb, input); THTensor_(add)(inputb, inputb, 1e-12); // Sum[ (Xi * Xi+1) ] int x,y,k; for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { double norm_A = 0; double norm_B = 0; double norm_C = 0; for (k=0; k<ichannels; k++) { norm_A += square(THTensor_(get3d)(inputb, k, y, x)); if (x < owidth-1) { double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y, x+1); THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x)); norm_B += square(THTensor_(get3d)(inputb, k, y, x+1)); } if (y < oheight-1) { double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y+1, x); THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x)); norm_C += square(THTensor_(get3d)(inputb, k, y+1, x)); } } if (x < owidth-1) { if (norm) { THTensor_(set3d)(output, 0, y, x, 1 - THTensor_(get3d)(output, 0, y, x) / (sqrt(norm_A) * sqrt(norm_B))); } else { THTensor_(set3d)(output, 0, y, x, ichannels - THTensor_(get3d)(output, 0, y, x)); } } if (y < oheight-1) { if (norm) { THTensor_(set3d)(output, 1, y, x, 1 - THTensor_(get3d)(output, 1, y, x) / (sqrt(norm_A) * sqrt(norm_C))); } else { THTensor_(set3d)(output, 1, y, x, ichannels - THTensor_(get3d)(output, 1, y, x)); } } } } // Cleanup THTensor_(free)(inputb); } return 1; }
static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); long oheight = luaT_getfieldcheckint(L, 1, "H"); long owidth = luaT_getfieldcheckint(L, 1, "W"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; long nslices; long iheight; long iwidth; long istride_d; long istride_h; long istride_w; long istride_b; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); if (input->nDimension == 4) { istride_b = input->stride[0]; nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; /* strides */ istride_d = input->stride[dimh-1]; istride_h = input->stride[dimh]; istride_w = input->stride[dimw]; /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); /* indices will contain i,j locations for each output point */ THTensor_(resize4d)(indices, 2, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*owidth*oheight, indices_data, nslices, iwidth, iheight, owidth, oheight, istride_w,istride_h, istride_d); } else { long p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); /* indices will contain i,j locations for each output point */ THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight, indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight, istride_w,istride_h, istride_d); } } return 1; }
static int torch_PipeFile_free(lua_State *L) { THFile *self = luaT_checkudata(L, 1, "torch.PipeFile"); THFile_free(self); return 0; }
static int nn_SpatialConvolution_backward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id); THTensor *gradInputPlane, *unfoldedInputPlane, *unfoldedGradInputPlane, *inputPlane; THTensor *gradOutputPlane; THTensor *weightPlane, *gradWeightPlane; int i, k; gradInputPlane = THTensor_new(); unfoldedInputPlane = THTensor_new(); unfoldedGradInputPlane = THTensor_new(); inputPlane = THTensor_new(); gradOutputPlane = THTensor_new(); weightPlane = THTensor_new(); gradWeightPlane = THTensor_new(); /* Not necessary with partial backprop: */ THTensor_resizeAs(gradInput, input); THTensor_zero(gradInput); for(k = 0; k < nOutputPlane; k++) { THTensor_select(gradOutputPlane, gradOutput, 2, k); THTensor_set1d(gradBias, k, THTensor_get1d(gradBias, k) + THTensor_sum(gradOutputPlane)); for(i = 0; i < nInputPlane; i++) { /* ------------------------- gradWeight ------------------------------------- */ /* Get the input image */ THTensor_select(inputPlane, input, 2, i); THTensor_unfold(unfoldedInputPlane, inputPlane, 0, kW, dW); THTensor_unfold(unfoldedInputPlane, NULL, 1, kH, dH); THTensor_transpose(unfoldedInputPlane,NULL,0,2); THTensor_transpose(unfoldedInputPlane,NULL,1,3); /* Get the good gradWeight for (k,i) (k out, i in) */ THTensor_select(gradWeightPlane, gradWeight, 3, k); THTensor_select(gradWeightPlane, NULL, 2, i); THTensor_addT4dotT2(gradWeightPlane, 1, unfoldedInputPlane, gradOutputPlane); /* -------------------------- gradInput ------------------------------------- */ /* Not necessary with partial backprop: */ /* Get the gradInput image */ THTensor_select(gradInputPlane, gradInput, 2, i); THTensor_unfold(unfoldedGradInputPlane, gradInputPlane, 0, kW, dW); THTensor_unfold(unfoldedGradInputPlane, NULL , 1, kH, dH); /* Get the good weight for (k,i) (k out, i in) */ THTensor_select(weightPlane, weight, 3, k); THTensor_select(weightPlane, NULL, 2, i); THTensor_addT2outT2(unfoldedGradInputPlane, 1, gradOutputPlane, weightPlane); } } THTensor_free(gradInputPlane); THTensor_free(unfoldedInputPlane); THTensor_free(unfoldedGradInputPlane); THTensor_free(inputPlane); THTensor_free(gradOutputPlane); THTensor_free(weightPlane); THTensor_free(gradWeightPlane); return 1; }
static int torch_Tensor_(storageOffset)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); lua_pushnumber(L, tensor->storageOffset+1); return 1; }
static int libjpeg_(Main_load)(lua_State *L) { /* This struct contains the JPEG decompression parameters and pointers to * working space (which is allocated as needed by the JPEG library). */ struct jpeg_decompress_struct cinfo; /* We use our private extension JPEG error handler. * Note that this struct must live as long as the main JPEG parameter * struct, to avoid dangling-pointer problems. */ struct my_error_mgr jerr; /* More stuff */ FILE * infile; /* source file (if loading from file) */ unsigned char * inmem; /* source memory (if loading from memory) */ unsigned long inmem_size; /* source memory size (bytes) */ JSAMPARRAY buffer; /* Output row buffer */ /* int row_stride; /1* physical row width in output buffer *1/ */ int i, k; THTensor *tensor = NULL; const int load_from_file = luaL_checkint(L, 1); if (load_from_file == 1) { const char *filename = luaL_checkstring(L, 2); /* In this example we want to open the input file before doing anything else, * so that the setjmp() error recovery below can assume the file is open. * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that * requires it in order to read binary files. */ if ((infile = fopen(filename, "rb")) == NULL) { luaL_error(L, "cannot open file <%s> for reading", filename); } } else { /* We're loading from a ByteTensor */ THByteTensor *src = luaT_checkudata(L, 2, "torch.ByteTensor"); inmem = THByteTensor_data(src); inmem_size = src->size[0]; infile = NULL; } /* Step 1: allocate and initialize JPEG decompression object */ /* We set up the normal JPEG error routines, then override error_exit. */ cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = libjpeg_(Main_error); /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object, close the input file, and return. */ jpeg_destroy_decompress(&cinfo); if (infile) { fclose(infile); } return 0; } /* Now we can initialize the JPEG decompression object. */ jpeg_create_decompress(&cinfo); /* Step 2: specify data source (eg, a file) */ if (load_from_file == 1) { jpeg_stdio_src(&cinfo, infile); } else { jpeg_mem_src(&cinfo, inmem, inmem_size); } /* Step 3: read file parameters with jpeg_read_header() */ (void) jpeg_read_header(&cinfo, TRUE); /* We can ignore the return value from jpeg_read_header since * (a) suspension is not possible with the stdio data source, and * (b) we passed TRUE to reject a tables-only JPEG file as an error. * See libjpeg.doc for more info. */ /* Step 4: set parameters for decompression */ /* In this example, we don't need to change any of the defaults set by * jpeg_read_header(), so we do nothing here. */ /* Step 5: Start decompressor */ (void) jpeg_start_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* We may need to do some setup of our own at this point before reading * the data. After jpeg_start_decompress() we have the correct scaled * output image dimensions available, as well as the output colormap * if we asked for color quantization. * In this example, we need to make an output work buffer of the right size. */ /* Make a one-row-high sample array that will go away when done with image */ tensor = THTensor_(newWithSize3d)(cinfo.output_components, cinfo.output_height, cinfo.output_width); buffer = (*cinfo.mem->alloc_sarray) ((j_common_ptr) &cinfo, JPOOL_IMAGE, cinfo.output_width * cinfo.output_components, 1); /* Step 6: while (scan lines remain to be read) */ /* jpeg_read_scanlines(...); */ /* Here we use the library's state variable cinfo.output_scanline as the * loop counter, so that we don't have to keep track ourselves. */ while (cinfo.output_scanline < cinfo.output_height) { /* jpeg_read_scanlines expects an array of pointers to scanlines. * Here the array is only one element long, but you could ask for * more than one scanline at a time if that's more convenient. */ (void) jpeg_read_scanlines(&cinfo, buffer, 1); for(k = 0; k < cinfo.output_components; k++) { for(i = 0; i < cinfo.output_width; i++) THTensor_(set3d)(tensor, k, cinfo.output_scanline-1, i, (real)buffer[0][cinfo.output_components*i+k]); } } /* Step 7: Finish decompression */ (void) jpeg_finish_decompress(&cinfo); /* We can ignore the return value since suspension is not possible * with the stdio data source. */ /* Step 8: Release JPEG decompression object */ /* This is an important step since it will release a good deal of memory. */ jpeg_destroy_decompress(&cinfo); /* After finish_decompress, we can close the input file. * Here we postpone it until after no more JPEG errors are possible, * so as to simplify the setjmp error logic above. (Actually, I don't * think that jpeg_destroy can do an error exit, but why assume anything...) */ if (infile) { fclose(infile); } /* At this point you may want to check to see whether any corrupt-data * warnings occurred (test whether jerr.pub.num_warnings is nonzero). */ /* And we're done! */ luaT_pushudata(L, tensor, torch_Tensor); return 1; }
static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) { // Input THTensor *input = (THTensor*)luaT_checkudata(L, 2, torch_Tensor); // Params: int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); THTensor *weight = (THTensor*)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = (THTensor*)luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); THTensor *output = (THTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { luaL_argcheck(L, input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match"); // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); } else { luaL_argcheck(L, input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match"); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Resize output THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); // Resize temporary columns THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Define a buffer of ones, for bias accumulation // Note: this buffer can be shared with other modules, it only ever gets increased, // and always contains ones. if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { // Resize plane and fill with ones... THTensor_(resize2d)(ones, outputHeight, outputWidth); THTensor_(fill)(ones, 1); } // Helpers THTensor *input_n = THTensor_(new)(); THTensor *output_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per output: THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(output_n, output, 0, elt); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m = weight->size[1] * weight->size[2] * weight->size[3]; long n = columns->size[1]; long k = weight->size[0]; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 'n', 't', n, m, k, 1, THTensor_(data)(input_n), n, THTensor_(data)(weight), m, 0, THTensor_(data)(columns), n ); // Unpack columns back into input: nn_(col2im)( THTensor_(data)(columns), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(output_n) ); // Do Bias after: // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m_ = nOutputPlane; long n_ = outputHeight * outputWidth; long k_ = 1; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 't', 'n', n_, m_, k_, 1, THTensor_(data)(ones), k_, THTensor_(data)(bias), k_, 1, THTensor_(data)(output_n), n_ ); } // Free THTensor_(free)(input_n); THTensor_(free)(output_n); // Resize output if (batch == 0) { THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); } // return output return 1; }
static int nnconv1d_(HorizontalConvolution_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int kL = luaT_getfieldcheckint(L, 1, "kL"); THTensor *ones = luaT_getfieldcheckudata(L, 1, "ones", torch_Tensor); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); // change to batch mode int batch = 1; if (input->nDimension == 3) { batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long batchSize = input->size[0]; long inputHeight = input->size[2]; long inputWidth = input->size[3]; long outputHeight = inputHeight; long outputWidth = inputWidth - kL + 1; if (ones->nDimension != 1 || ones->size[0] < outputHeight*outputWidth) { THTensor_(resize1d)(ones, outputHeight*outputWidth); THTensor_(fill)(ones, 1); } int elt; for (elt = 0; elt < batchSize; elt++) { // select each batch in 2D THTensor *input_t = THTensor_(newSelect)(input, 0, elt); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt); THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset, nOutputPlane, -1, outputWidth*outputHeight, -1); // dot products int i, j, k; for (i = 0; i < nInputPlane; i++) { for (k = 0; k < kL; k++) { for (j = 0; j < outputHeight; j++) { *(gradWeight->storage->data + gradWeight->storageOffset + i*gradWeight->stride[0] + k) += scale*THBlas_(dot) (outputWidth, gradOutput_t->storage->data + gradOutput_t->storageOffset + i*gradOutput_t->stride[0] + j*gradOutput_t->stride[1], gradOutput_t->stride[2], input_t->storage->data + input_t->storageOffset + i*input_t->stride[0] + j*input_t->stride[1] + k, input_t->stride[2]); } } } // fill biases THTensor_(addmv)(gradBias, 1, gradBias, scale, gradOutput2d, ones); THTensor_(free)(gradOutput2d); THTensor_(free)(input_t); THTensor_(free)(gradOutput_t); } // revert to single batch if (batch == 0) { THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); } return 0; }
static int nn_(SpatialFullConvolution_updateGradInput)(lua_State *L) { // Inputs THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor); // Params int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); THTensor *weight = (THTensor *)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradColumns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *gradInput = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Resize output THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth); // Resize temporary columns THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Helpers THTensor *gradInput_n = THTensor_(new)(); THTensor *gradOutput_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per sample: THTensor_(select)(gradInput_n, gradInput, 0, elt); THTensor_(select)(gradOutput_n, gradOutput, 0, elt); // Extract columns: nn_(im2col)( THTensor_(data)(gradOutput_n), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(gradColumns) ); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m = weight->size[0]; long n = gradColumns->size[1]; long k = weight->size[1] * weight->size[2] * weight->size[3]; // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 'n', 'n', n, m, k, 1, THTensor_(data)(gradColumns), n, THTensor_(data)(weight), k, 0, THTensor_(data)(gradInput_n), n ); } // Free THTensor_(free)(gradInput_n); THTensor_(free)(gradOutput_n); // Resize output if (batch == 0) { THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); } // Return gradInput return 1; }
static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) { // Inputs THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor); // Params int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int adjW = luaT_getfieldcheckint(L, 1, "adjW"); int adjH = luaT_getfieldcheckint(L, 1, "adjH"); float scale = luaL_optnumber(L, 4, 1); THTensor *gradWeight = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor); THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected"); int batch = 1; if (input->nDimension == 3) { // Force batch batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]); } long inputWidth = input->size[3]; long inputHeight = input->size[2]; long outputWidth = (inputWidth - 1) * dW - 2*padW + kW + adjW; long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH; // Batch size + input planes long batchSize = input->size[0]; // Define a buffer of ones, for bias accumulation if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) { // Resize plane and fill with ones... THTensor_(resize2d)(ones, outputHeight, outputWidth); THTensor_(fill)(ones, 1); } // Resize temporary columns THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth); // Helpers THTensor *input_n = THTensor_(new)(); THTensor *gradOutput_n = THTensor_(new)(); int elt; // For each elt in batch, do: for (elt = 0; elt < batchSize; elt ++) { // Matrix mulitply per output: THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(gradOutput_n, gradOutput, 0, elt); // Extract columns: nn_(im2col)( THTensor_(data)(gradOutput_n), nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW, THTensor_(data)(columns) ); // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long n = columns->size[0]; // nOutputPlane * kh * kw long m = input_n->size[0]; // nInputPlane long k = columns->size[1]; // inputHeight * inputWidth // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices) THBlas_(gemm)( 't', 'n', n, m, k, scale, THTensor_(data)(columns), k, THTensor_(data)(input_n), k, 1, THTensor_(data)(gradWeight), n ); // Do Bias: // M,N,K are dims of matrix A and B // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm) long m_ = nOutputPlane; long k_ = outputHeight * outputWidth; // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices) THBlas_(gemv)( 't', k_, m_, scale, THTensor_(data)(gradOutput_n), k_, THTensor_(data)(ones), 1, 1, THTensor_(data)(gradBias), 1 ); } // Free THTensor_(free)(input_n); THTensor_(free)(gradOutput_n); // Resize if (batch == 0) { THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); } // Return nothing return 0; }
static int nn_LcEncoder_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int winX = luaT_getfieldcheckint(L, 1, "winX"); int winY = luaT_getfieldcheckint(L, 1, "winY"); int woutX = luaT_getfieldcheckint(L, 1, "woutX"); int woutY = luaT_getfieldcheckint(L, 1, "woutY"); double xStep = luaT_getfieldchecknumber(L, 1, "xStep"); double yStep = luaT_getfieldchecknumber(L, 1, "yStep"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1"); THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX; THTensor *weightSelectedX, *weightSelectedYX; inputPlane = THTensor_new(); inputNarrowedX = THTensor_new(); inputNarrowedYX = THTensor_new(); weightSelectedX = THTensor_new(); weightSelectedYX = THTensor_new(); // get output size from input THTensor_resize3d(output, (input->size[0] - winX+1) / xStep, (input->size[1] - winY+1) / yStep, 1); THTensor_select(inputPlane, input, 2, 0); int y,x,iy,ix,wy,wx; for (y = 0; y<output->size[1]; y++) { iy = (int)floor(y*yStep); wy = y%woutY; for (x = 0; x<output->size[0]; x++) { ix = (int)floor(x*xStep); wx = x%woutX; THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX); THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY); THTensor_select(weightSelectedX, weight, 3, wy); THTensor_select(weightSelectedYX, weightSelectedX, 2, wx); double dot = THTensor_dot(inputNarrowedYX, weightSelectedYX); double biasSelect = THTensor_get2d(bias,wx,wy); THTensor_set3d(output,x,y,0,dot+biasSelect); } } THTensor_free(inputPlane); THTensor_free(inputNarrowedX); THTensor_free(inputNarrowedYX); THTensor_free(weightSelectedX); THTensor_free(weightSelectedYX); return 1; }
static int nn_LcEncoder_backward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id); int winX = luaT_getfieldcheckint(L, 1, "winX"); int winY = luaT_getfieldcheckint(L, 1, "winY"); int woutX = luaT_getfieldcheckint(L, 1, "woutX"); int woutY = luaT_getfieldcheckint(L, 1, "woutY"); double xStep = luaT_getfieldchecknumber(L, 1, "xStep"); double yStep = luaT_getfieldchecknumber(L, 1, "yStep"); luaL_argcheck(L, input->nDimension == 3, 2, "input 3D tensor expected"); luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1"); luaL_argcheck(L, gradOutput->nDimension == 3, 3, "gradOutput 3D tensor expected"); luaL_argcheck(L, gradOutput->size[2] == 1, 3, "invalid gradOutput 3rd dim size has to be 1"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id); /* ----------------------- gradWeight ----------------------- */ THTensor_fill(gradWeight, 0); THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX; inputPlane = THTensor_new(); inputNarrowedX = THTensor_new(); inputNarrowedYX = THTensor_new(); THTensor_select(inputPlane, input, 2, 0); THTensor *gradWeightSelectedX, *gradWeightSelectedYX; gradWeightSelectedX = THTensor_new(); gradWeightSelectedYX = THTensor_new(); /* ----------------------- gradInput ------------------------ */ THTensor_resizeAs(gradInput, input); THTensor_fill(gradInput, 0); THTensor *gradInputPlane, *gradInputNarrowedX, *gradInputNarrowedYX; gradInputPlane = THTensor_new(); gradInputNarrowedX = THTensor_new(); gradInputNarrowedYX = THTensor_new(); THTensor_select(gradInputPlane, gradInput, 2, 0); THTensor *weightSelectedX, *weightSelectedYX; weightSelectedX = THTensor_new(); weightSelectedYX = THTensor_new(); int y,x,iy,ix,wy,wx; for (y = 0; y<gradOutput->size[1]; y++) { iy = (int)floor(y*yStep); wy = y%woutY; for (x = 0; x<gradOutput->size[0]; x++) { ix = (int)floor(x*xStep); wx = x%woutX; double gradOutVal = THTensor_get3d(gradOutput,x,y,0); /* ----------------------- gradWeight ----------------------- */ THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX); THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY); THTensor_select(gradWeightSelectedX, gradWeight, 3, wy); THTensor_select(gradWeightSelectedYX, gradWeightSelectedX, 2, wx); THTensor_addTensor(gradWeightSelectedYX, gradOutVal, inputNarrowedYX); /* ----------------------- gradBias ----------------------- */ THTensor_set2d(gradBias,wx,wy, THTensor_get2d(gradBias,wx,wy) + gradOutVal); /* ----------------------- gradInput ------------------------ */ THTensor_narrow(gradInputNarrowedX, gradInputPlane, 0, ix, winX); THTensor_narrow(gradInputNarrowedYX, gradInputNarrowedX, 1, iy, winY); THTensor_select(weightSelectedX, weight, 3, wy); THTensor_select(weightSelectedYX, weightSelectedX, 2, wx); THTensor_addTensor(gradInputNarrowedYX, gradOutVal, weightSelectedYX); } } /* free gradWeight */ THTensor_free(inputPlane); THTensor_free(inputNarrowedX); THTensor_free(inputNarrowedYX); THTensor_free(gradWeightSelectedX); THTensor_free(gradWeightSelectedYX); /* free gradInput */ THTensor_free(gradInputPlane); THTensor_free(gradInputNarrowedX); THTensor_free(gradInputNarrowedYX); THTensor_free(weightSelectedX); THTensor_free(weightSelectedYX); return 1; }
static int QPSolver_sumflag(lua_State *L) { SVQP2 *qp = (SVQP2*)luaT_checkudata(L, 1, QPSolver_id); qp->sumflag = luaT_checkboolean(L, 2); return 0; }
static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(input); return 1; }
static int nnconv1d_(HorizontalConvolution_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int kL = luaT_getfieldcheckint(L, 1, "kL"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor expected"); // change to batch mode int batch = 1; if (input->nDimension == 3) { batch = 0; THTensor_(resize4d)(input, 1, nInputPlane, input->size[1], input->size[2]); } long batchSize = input->size[0]; long inputHeight = input->size[2]; long inputWidth = input->size[3]; long outputHeight = inputHeight; long outputWidth = inputWidth - kL + 1; THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth); int elt; #pragma omp parallel for private(elt) for (elt = 0; elt < batchSize; elt++) { // select each batch THTensor *input_t = THTensor_(newSelect)(input, 0, elt); THTensor *output_t = THTensor_(newSelect)(output, 0, elt); // fill biases int i, j, k; for (i = 0; i < nOutputPlane; i++) { THVector_(fill)(output_t->storage->data+output_t->storageOffset+output_t->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth); } // convolve horizontally for (i = 0; i < nInputPlane; i++) { for (j = 0; j < inputHeight; j++) { for (k = 0; k < kL; k++) { THVector_(add)(output_t->storage->data + output_t->storageOffset + output_t->stride[0]*i + output_t->stride[1]*j, input_t->storage->data + input_t->storageOffset + input_t->stride[0]*i + input_t->stride[1]*j + k, *(THTensor_(data)(weight)+i*kL+k), outputWidth); } } } // release temp tensors THTensor_(free)(input_t); THTensor_(free)(output_t); } // revert to single batch if (batch == 0) { THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); } return 1; }
static int torch_Tensor_(nDimension)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); lua_pushnumber(L, tensor->nDimension); return 1; }
static int nnconv1d_(HorizontalConvolution_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); int kL = luaT_getfieldcheckint(L, 1, "kL"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); // change to batch mode int batch = 1; if (input->nDimension == 3) { batch = 0; THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(gradOutput, 1, nOutputPlane, gradOutput->size[1], gradOutput->size[2]); } long batchSize = input->size[0]; long inputHeight = input->size[2]; long inputWidth = input->size[3]; long outputHeight = inputHeight; long outputWidth = inputWidth - kL + 1; THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); int elt; #pragma omp parallel for private(elt) for (elt = 0; elt < batchSize; elt++) { // select each batch THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, elt); THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt); // convolve horizontally int i, j, k; for (i = 0; i < nOutputPlane; i++) { for (j = 0; j < outputHeight; j++) { for (k = 0; k < kL; k++) { THVector_(add)(gradInput_t->storage->data + gradInput_t->storageOffset + gradInput_t->stride[0]*i + gradInput_t->stride[1]*j + k, gradOutput_t->storage->data + gradOutput_t->storageOffset + gradOutput_t->stride[0]*i + gradOutput_t->stride[1]*j, *(THTensor_(data)(weight)+i*kL+k), outputWidth); // needs to change } } } // release temp tensors THTensor_(free)(gradInput_t); THTensor_(free)(gradOutput_t); } // revert to single batch if (batch == 0) { THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth); THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth); } return 1; }
/* * save function * */ int libjpeg_(Main_save)(lua_State *L) { unsigned char *inmem = NULL; /* destination memory (if saving to memory) */ unsigned long inmem_size = 0; /* destination memory size (bytes) */ /* get args */ const char *filename = luaL_checkstring(L, 1); THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor); THTensor *tensorc = THTensor_(newContiguous)(tensor); real *tensor_data = THTensor_(data)(tensorc); const int save_to_file = luaL_checkint(L, 3); THByteTensor* tensor_dest = NULL; if (save_to_file == 0) { tensor_dest = luaT_checkudata(L, 5, "torch.ByteTensor"); } int quality = luaL_checkint(L, 4); if (quality < 0 || quality > 100) { luaL_error(L, "quality should be between 0 and 100"); } /* jpeg struct */ struct jpeg_compress_struct cinfo; struct jpeg_error_mgr jerr; /* pointer to raw image */ unsigned char *raw_image = NULL; /* dimensions of the image we want to write */ int width=0, height=0, bytes_per_pixel=0; int color_space=0; if (tensorc->nDimension == 3) { bytes_per_pixel = tensorc->size[0]; height = tensorc->size[1]; width = tensorc->size[2]; if (bytes_per_pixel == 3) { color_space = JCS_RGB; } else if (bytes_per_pixel == 1) { color_space = JCS_GRAYSCALE; } else { luaL_error(L, "tensor should have 1 or 3 channels (gray or RGB)"); } } else if (tensorc->nDimension == 2) { bytes_per_pixel = 1; height = tensorc->size[0]; width = tensorc->size[1]; color_space = JCS_GRAYSCALE; } else { luaL_error(L, "supports only 1 or 3 dimension tensors"); } /* alloc raw image data */ raw_image = (unsigned char *)malloc((sizeof (unsigned char))*width*height*bytes_per_pixel); /* convert tensor to raw bytes */ int x,y,k; for (k=0; k<bytes_per_pixel; k++) { for (y=0; y<height; y++) { for (x=0; x<width; x++) { raw_image[(y*width+x)*bytes_per_pixel+k] = *tensor_data++; } } } /* this is a pointer to one row of image data */ JSAMPROW row_pointer[1]; FILE *outfile = NULL; if (save_to_file == 1) { outfile = fopen( filename, "wb" ); if ( !outfile ) { luaL_error(L, "Error opening output jpeg file %s\n!", filename ); } } cinfo.err = jpeg_std_error( &jerr ); jpeg_create_compress(&cinfo); /* specify data source (eg, a file) */ if (save_to_file == 1) { jpeg_stdio_dest(&cinfo, outfile); } else { jpeg_mem_dest(&cinfo, &inmem, &inmem_size); } /* Setting the parameters of the output file here */ cinfo.image_width = width; cinfo.image_height = height; cinfo.input_components = bytes_per_pixel; cinfo.in_color_space = color_space; /* default compression parameters, we shouldn't be worried about these */ jpeg_set_defaults( &cinfo ); jpeg_set_quality(&cinfo, quality, (boolean)0); /* Now do the compression .. */ jpeg_start_compress( &cinfo, TRUE ); /* like reading a file, this time write one row at a time */ while( cinfo.next_scanline < cinfo.image_height ) { row_pointer[0] = &raw_image[ cinfo.next_scanline * cinfo.image_width * cinfo.input_components]; jpeg_write_scanlines( &cinfo, row_pointer, 1 ); } /* similar to read file, clean up after we're done compressing */ jpeg_finish_compress( &cinfo ); jpeg_destroy_compress( &cinfo ); if (outfile != NULL) { fclose( outfile ); } if (save_to_file == 0) { THByteTensor_resize1d(tensor_dest, inmem_size); /* will fail if it's not a Byte Tensor */ unsigned char* tensor_dest_data = THByteTensor_data(tensor_dest); memcpy(tensor_dest_data, inmem, inmem_size); free(inmem); } /* some cleanup */ free(raw_image); THTensor_(free)(tensorc); /* success code is 1! */ return 1; }
inline THTensor *libopencv_(checkTensor)(lua_State* L, int arg) { return (THTensor*)luaT_checkudata(L, arg, torch_Tensor); }
/* * save function * */ int libjpeg_(Main_save)(lua_State *L) { /* get args */ const char *filename = luaL_checkstring(L, 1); THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor); THTensor *tensorc = THTensor_(newContiguous)(tensor); real *tensor_data = THTensor_(data)(tensorc); /* jpeg struct */ struct jpeg_compress_struct cinfo; struct jpeg_error_mgr jerr; /* pointer to raw image */ unsigned char *raw_image = NULL; /* dimensions of the image we want to write */ int width=0, height=0, bytes_per_pixel=0; int color_space=0; if (tensorc->nDimension == 3) { bytes_per_pixel = tensorc->size[0]; height = tensorc->size[1]; width = tensorc->size[2]; if (bytes_per_pixel == 3) { color_space = JCS_RGB; } else if (bytes_per_pixel == 1) { color_space = JCS_GRAYSCALE; } else { luaL_error(L, "tensor should have 1 or 3 channels (gray or RGB)"); } } else if (tensorc->nDimension == 2) { bytes_per_pixel = 1; height = tensorc->size[0]; width = tensorc->size[1]; color_space = JCS_GRAYSCALE; } else { luaL_error(L, "supports only 1 or 3 dimension tensors"); } /* alloc raw image data */ raw_image = (unsigned char *)malloc((sizeof (unsigned char))*width*height*bytes_per_pixel); /* convert tensor to raw bytes */ int x,y,k; for (k=0; k<bytes_per_pixel; k++) { for (y=0; y<height; y++) { for (x=0; x<width; x++) { raw_image[(y*width+x)*bytes_per_pixel+k] = *tensor_data++; } } } /* this is a pointer to one row of image data */ JSAMPROW row_pointer[1]; FILE *outfile = fopen( filename, "wb" ); if ( !outfile ) { printf("Error opening output jpeg file %s\n!", filename ); return -1; } cinfo.err = jpeg_std_error( &jerr ); jpeg_create_compress(&cinfo); jpeg_stdio_dest(&cinfo, outfile); /* Setting the parameters of the output file here */ cinfo.image_width = width; cinfo.image_height = height; cinfo.input_components = bytes_per_pixel; cinfo.in_color_space = color_space; /* default compression parameters, we shouldn't be worried about these */ jpeg_set_defaults( &cinfo ); /* Now do the compression .. */ jpeg_start_compress( &cinfo, TRUE ); /* like reading a file, this time write one row at a time */ while( cinfo.next_scanline < cinfo.image_height ) { row_pointer[0] = &raw_image[ cinfo.next_scanline * cinfo.image_width * cinfo.input_components]; jpeg_write_scanlines( &cinfo, row_pointer, 1 ); } /* similar to read file, clean up after we're done compressing */ jpeg_finish_compress( &cinfo ); jpeg_destroy_compress( &cinfo ); fclose( outfile ); /* some cleanup */ free(raw_image); THTensor_(free)(tensorc); /* success code is 1! */ return 1; }
template<> inline THTensor<float> FromLuaStack<THTensor<float> >(lua_State* L, int i) { return THTensor<float>((TH<float>::CTensor*)luaT_checkudata(L, i, luaT_checktypename2id(L, "torch.FloatTensor"))); }
static int nn_SpatialConvolution_forward(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id); THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id); THTensor *outputPlane, *inputPlane, *weightPlane, *unfoldedInputPlane; int i, k; luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[2] == nInputPlane, 2, "invalid number of input planes"); luaL_argcheck(L, input->size[0] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size"); THTensor_resize3d(output, (input->size[0] - kW) / dW + 1, (input->size[1] - kH) / dH + 1, nOutputPlane); inputPlane = THTensor_new(); weightPlane = THTensor_new(); outputPlane = THTensor_new(); unfoldedInputPlane = THTensor_new(); for(k = 0; k < nOutputPlane; k++) { THTensor_select(outputPlane, output, 2, k); /* Initialize to the bias */ THTensor_fill(outputPlane, THTensor_get1d(bias, k)); /* Go! */ for(i = 0; i < nInputPlane; i++) { THTensor_select(inputPlane, input, 2, i); /* Get the good mask for (k,i) (k out, i in) */ THTensor_select(weightPlane, weight, 3, k); THTensor_select(weightPlane, NULL, 2, i); /* Get the input image */ THTensor_unfold(unfoldedInputPlane, inputPlane, 0, kW, dW); THTensor_unfold(unfoldedInputPlane, NULL, 1, kH, dH); THTensor_addT4dotT2(outputPlane, 1, unfoldedInputPlane, weightPlane); } } THTensor_free(inputPlane); THTensor_free(weightPlane); THTensor_free(outputPlane); THTensor_free(unfoldedInputPlane); return 1; }
template<> inline THTensor<double> FromLuaStack<THTensor<double> >(lua_State* L, int i) { return THTensor<double>((TH<double>::CTensor*)luaT_checkudata(L, i, luaT_checktypename2id(L, "torch.DoubleTensor"))); }
static int nn_(SpatialGraph_updateGradInput)(lua_State *L) { // get all params THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); //int connex = luaT_getfieldcheckint(L, 1, "connex"); int dist = luaT_getfieldcheckint(L, 1, "dist"); int norm = luaT_getfieldcheckint(L, 1, "normalize"); // dims //int iwidth = input->size[2]; //int iheight = input->size[1]; int ichannels = input->size[0]; int owidth = gradOutput->size[2]; int oheight = gradOutput->size[1]; //int ochannels = gradOutput->size[0]; // norm ? double normer = (norm == 1) ? 1/sqrt(ichannels)/sqrt(ichannels) : 1; // resize gradInput THTensor_(zero)(gradInput); // compute derivatives, and backpropagate output error to input if (dist == 0) { int x,y,k; for (k=0; k<ichannels; k++) { for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { if (x < owidth-1) { double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1); if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 0, y, x); partial_d *= THTensor_(get3d)(gradOutput, 0, y, x) * normer; THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); THTensor_(set3d)(gradInput, k, y, x+1, -partial_d + THTensor_(get3d)(gradInput, k, y, x+1)); } if (y < oheight-1) { double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x); if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 1, y, x); partial_d *= THTensor_(get3d)(gradOutput, 1, y, x) * normer; THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); THTensor_(set3d)(gradInput, k, y+1, x, -partial_d + THTensor_(get3d)(gradInput, k, y+1, x)); } } } } // Cosine } else { int x,y,k; for (y=0; y<oheight; y++) { for (x=0; x<owidth; x++) { double sum_A = 0; double sum_B = 0; double sum_C = 0; double sum_AB = 0; double sum_AC = 0; if (norm) { for (k=0; k<ichannels; k++) { sum_A += square(THTensor_(get3d)(input, k, y, x)); if (x < owidth-1) { sum_B += square(THTensor_(get3d)(input, k, y, x+1)); sum_AB += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y, x+1); } if (y < oheight-1) { sum_C += square(THTensor_(get3d)(input, k, y+1, x)); sum_AC += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y+1, x); } } } double term1, term2, term3, partial_d; double epsi = 1e-12; if (x < owidth-1) { if (norm) { term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_B, 1/2) + epsi ); term2 = sum_AB / ( pow(sum_A, 3/2) * pow(sum_B, 1/2) + epsi ); term3 = sum_AB / ( pow(sum_B, 3/2) * pow(sum_A, 1/2) + epsi ); } for (k=0; k<ichannels; k++) { if (norm) { partial_d = term2 * THTensor_(get3d)(input, k, y, x) - term1 * THTensor_(get3d)(input, k, y, x+1); } else { partial_d = -THTensor_(get3d)(input, k, y, x+1); } partial_d *= THTensor_(get3d)(gradOutput, 0, y, x); THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); if (norm) { partial_d = term3 * THTensor_(get3d)(input, k, y, x+1) - term1 * THTensor_(get3d)(input, k, y, x); } else { partial_d = -THTensor_(get3d)(input, k, y, x); } partial_d *= THTensor_(get3d)(gradOutput, 0, y, x); THTensor_(set3d)(gradInput, k, y, x+1, partial_d + THTensor_(get3d)(gradInput, k, y, x+1)); } } if (y < oheight-1) { if (norm) { term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_C, 1/2) + epsi ); term2 = sum_AC / ( pow(sum_A, 3/2) * pow(sum_C, 1/2) + epsi ); term3 = sum_AC / ( pow(sum_C, 3/2) * pow(sum_A, 1/2) + epsi ); } for (k=0; k<ichannels; k++) { if (norm) { partial_d = term2 * THTensor_(get3d)(input, k, y, x) - term1 * THTensor_(get3d)(input, k, y+1, x); } else { partial_d = -THTensor_(get3d)(input, k, y+1, x); } partial_d *= THTensor_(get3d)(gradOutput, 1, y, x); THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x)); if (norm) { partial_d = term3 * THTensor_(get3d)(input, k, y+1, x) - term1 * THTensor_(get3d)(input, k, y, x); } else { partial_d = -THTensor_(get3d)(input, k, y, x); } partial_d *= THTensor_(get3d)(gradOutput, 1, y, x); THTensor_(set3d)(gradInput, k, y+1, x, partial_d + THTensor_(get3d)(gradInput, k, y+1, x)); } } } } } return 1; }
static int nxn_(Jitter_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int xstart = luaT_getfieldcheckint(L, 1, "xstart"); int ystart = luaT_getfieldcheckint(L, 1, "ystart"); int xcrop = luaT_getfieldcheckint(L, 1, "xcrop"); int ycrop = luaT_getfieldcheckint(L, 1, "ycrop"); int hflip = luaT_getfieldcheckint(L, 1, "randflip"); int bs = input->size[0]; int outy = input->size[1] - ycrop; int outx = input->size[2] - xcrop; int channels = input->size[3]; THTensor_(resize4d)(output, bs, outy, outx, channels); real* idata = THTensor_(data)(input); real* odata = THTensor_(data)(output); int istr0 = input->stride[0]; int istr1 = input->stride[1]; int istr2 = input->stride[2]; int istr3 = input->stride[3]; int ostr0 = output->stride[0]; int ostr1 = output->stride[1]; int ostr2 = output->stride[2]; int ostr3 = output->stride[3]; /* This is jittering + hflip */ int batchidx, y, x, ch; if(hflip==1) { #pragma omp parallel for private(batchidx) for(batchidx=0; batchidx<bs; batchidx++) { for (y = 0; y<outy; y++) { for(x = 0; x<outx; x++) { for (ch = 0; ch < channels; ch++) { odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (xstart-1+outx-1-x)*istr2 + ch*istr3]; } } } } } else /* This is only jittering */ { #pragma omp parallel for private(batchidx) for(batchidx=0; batchidx<bs; batchidx++) { for (y = 0; y<outy; y++) { for(x = 0; x<outx; x++) { for (ch = 0; ch < channels; ch++) { odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (x+xstart-1)*istr2 + ch*istr3]; } } } } } return 1; }
static int nn_(SpatialAdaptiveMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; int nslices; int iheight; int iwidth; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; real *indices_data; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THTensor_(data)(indices); /* backprop */ if (input->nDimension == 3) { nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, indices_data+nslices*owidth*oheight, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; }
static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int nslices; int itime; int iheight; int iwidth; int otime; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = gradOutput->size[dimt]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); /* backprop */ if (input->nDimension == 4) { /* non-batch mode*/ nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data, gradOutput_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; #pragma omp parallel for private(p) for (p = 0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; }
static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); real scale = luaL_optnumber(L, 4, 1); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); int dimw = 2; int dimh = 1; real *gradBias_data; real *gradOutput_data; long noutSlice; THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); if (input->nDimension == 4) { dimw++; dimh++; } /* gradient to bias */ gradBias_data = THTensor_(data)(gradBias); gradOutput_data = THTensor_(data)(gradOutput); noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; /*THTensor* gradOutSlice = THTensor_(new)();*/ if (input->nDimension == 3) { long k; #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { /*THTensor_(select)(gradOutSlice, gradOutput, 0, k);*/ real *ptr_gradOutput = gradOutput_data + k*noutSlice; long l; for(l = 0; l < noutSlice; l++) gradBias_data[k] += scale*ptr_gradOutput[l]; } /* gradient to kernels */ THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW); } else { long k; #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { long p; for(p = 0; p < input->size[0]; p++) { /* BIAS */ real *ptr_gradOutput = gradOutput_data + p*nOutputPlane*noutSlice + k*noutSlice; long l; for(l = 0; l < noutSlice; l++) gradBias_data[k] += scale*ptr_gradOutput[l]; } } /* gradient to kernels */ THTensor_(conv2DRevgerm)(gradWeight, 1.0, scale, input, gradOutput, dH, dW); } return 0; }
static int QPSolver_free(lua_State *L) { SVQP2 *qp = (SVQP2*)luaT_checkudata(L, 1, QPSolver_id); delete qp; return 0; }