static int nn_(Min_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THLongStorage *dim; long i; luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); dim = THLongStorage_newWithSize(input->nDimension); for(i = 0; i < input->nDimension; i++) dim->data[i] = input->size[i]; dim->data[dimension] = 1; THTensor_(resize)(output, dim, NULL); THTensor_(resize)(indices, dim, NULL); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension, long theIndex = 0; real theMin = input_data[0]; for(i = 1; i < input_size; i++) { if(input_data[i*input_stride] < theMin) { theIndex = i; theMin = input_data[i*input_stride]; } } *indices_data = theIndex+1; *output_data = theMin;) if(output->nDimension > 1)
void THLab_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension) { THLongStorage *dim; long i; THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension out of range"); dim = THTensor_(newSizeOf)(t); THLongStorage_set(dim, dimension, 1); THTensor_(resize)(values_, dim); THLongTensor_resize(indices_, dim); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY3(real, Real, t, real, Real, values_, long, Long, indices_, dimension, long theIndex = 0; real theMax = t_data[0]; for(i = 1; i < t_size; i++) { if(t_data[i*t_stride] > theMax) { theIndex = i; theMax = t_data[i*t_stride]; } } *indices__data = theIndex; *values__data = theMax;);
/* helpful functions */ static void torch_(Tensor_c_readSize)(lua_State *L, int index, THLongStorage **size_) { THLongStorage *size = NULL; long i; if( (size = luaT_toudata(L, index, torch_LongStorage_id)) ) { THLongStorage_retain(size); *size_ = size; } else { size = THLongStorage_newWithSize(4); for(i = 0; i < 4; i++) { if(lua_isnone(L, index+i)) THLongStorage_set(size, i, 0); else { if(lua_isnumber(L, index+i)) THLongStorage_set(size, i, lua_tonumber(L, index+i)); else { THLongStorage_free(size); luaL_error(L, "invalid argument %d: number expected", index+i); } } } *size_ = size; } }
THLongStorage* lab_checklongargs(lua_State *L, int index) { THLongStorage *storage; int i; int narg = lua_gettop(L)-index+1; if(narg == 1 && luaT_toudata(L, index, torch_LongStorage_id)) { THLongStorage *storagesrc = luaT_toudata(L, index, torch_LongStorage_id); storage = THLongStorage_newWithSize(storagesrc->size); THLongStorage_copy(storage, storagesrc); } else { storage = THLongStorage_newWithSize(narg); for(i = index; i < index+narg; i++) { if(!lua_isnumber(L, i)) { THLongStorage_free(storage); luaL_argerror(L, i, "number expected"); } storage->data[i-index] = lua_tonumber(L, i); } } return storage; }
void THNN_(GatedLinear_updateOutput)( THNNState *state, THTensor *input, THTensor *output, int dim) { // size output to half of input dim = dim - TH_INDEX_BASE; const int64_t nIn = THTensor_(size)(input, dim); THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld", dim + TH_INDEX_BASE, nIn); const int64_t inputSize = THTensor_(size)(input, dim) / 2; THLongStorage *newSizes = THTensor_(newSizeOf)(input); THLongStorage_set(newSizes, dim, inputSize); THTensor_(resize)(output, newSizes, NULL); // halve tensor THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize); THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize); // x = x1:cmul( sigmoid(x2) ) THTensor_(sigmoid)(output, secondHalf); THTensor_(cmul)(output, output, firstHalf); THLongStorage_free(newSizes); THTensor_(free)(firstHalf); THTensor_(free)(secondHalf); }
static void tensorRandn(rpc::RPCMessage& raw_message) { thpp::Tensor *r = unpackRetrieveTensor(raw_message); thpp::Generator *_generator = unpackRetrieveGenerator(raw_message); THLongStorage *size = unpackTHLongStorage(raw_message); finalize(raw_message); r->randn(*_generator, size); THLongStorage_free(size); }
static int torch_(Tensor_resize)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_(Tensor_id)); THLongStorage *size; torch_(Tensor_c_readSize)(L, 2, &size); THTensor_(resize)(tensor, size); THLongStorage_free(size); lua_settop(L, 1); return 1; }
static int torch_(Tensor_new)(lua_State *L) { THTensor *tensor; THStorage *storage = NULL; long storageOffset = 0; THLongStorage *size = NULL; torch_(Tensor_c_readTensorStorageSize)(L, 1, 1, 1, 1, &storage, &storageOffset, &size); tensor = THTensor_(newWithStorage)(storage, storageOffset, size); THLongStorage_free(size); luaT_pushudata(L, tensor, torch_(Tensor_id)); return 1; }
/* * Based on the implementation of the THTensor_(indexSelect) in torch7 */ static void THCudaTensor_indexSelect(THCudaTensor *tensor, THCudaTensor *src, int dim, THLongTensor *index) { long i, numel; THLongStorage *newSize; THCudaTensor *tSlice, *sSlice; long *index_data; THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds"); THArgCheck(src->nDimension > 0,2,"Source tensor is empty"); numel = THLongTensor_nElement(index); newSize = THLongStorage_newWithSize(src->nDimension); THLongStorage_rawCopy(newSize,src->size); newSize->data[dim] = numel; THCudaTensor_resize(tensor,newSize,NULL); THLongStorage_free(newSize); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (src->nDimension > 1) { tSlice = THCudaTensor_new(); sSlice = THCudaTensor_new(); THCudaTensor_select(tSlice, tensor, dim, i); THCudaTensor_select(sSlice, src, dim, index_data[i]-1); THCudaTensor_copy(tSlice, sSlice); THCudaTensor_free(tSlice); THCudaTensor_free(sSlice); } else { // It's faster to copy a float from an address in the device to another address in the device than // retrieving it to the host memory and recopy it to the device memory THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + i,\ src->storage->data + src->storageOffset + index_data[i]-1, sizeof(float), cudaMemcpyDeviceToDevice)); } } THLongTensor_free(index); }
void translate_rotate(THDoubleTensor *result, THDoubleTensor *trans, THDoubleTensor *quat, THDoubleTensor *vect ) { long outDimension = quat->nDimension + vect->nDimension -1; THLongStorage *newSize = THLongStorage_newWithSize(outDimension); long *sd = THLongStorage_data(newSize); long offset = 0; long quatStride = quat->size[quat->nDimension-1]; long transStride = trans->size[trans->nDimension-1]; long vectStride = vect->size[vect->nDimension-1]; long nElementQuat = THDoubleTensor_nElement(quat); long nElementVect = THDoubleTensor_nElement(vect); long nQuat = nElementQuat / quatStride; long nTrans = THDoubleTensor_nElement(trans) / transStride; long i,j; THArgCheck(nTrans == nQuat, 2, "Different number of translations and rotations"); THArgCheck(((transStride == 3) || (transStride == 4)),2, "translation vectors should be of length 3 or 4"); THArgCheck(quatStride == 4, 3, "quaternion is a vector of length 4"); THArgCheck(((vectStride == 3) || (vectStride == 4)), 4, "point vectors should be of length 3 or 4"); for (i = 0 ; i < quat->nDimension-1 ; i++){ sd[offset] = quat->size[i]; offset += 1; } for (i = 0 ; i < vect->nDimension-1 ; i++){ sd[offset] = vect->size[i]; offset += 1; } sd[offset] = vectStride; THDoubleTensor_resize(result, newSize, NULL); if (vectStride == 4) // incase homogenous coordinates are requested THDoubleTensor_fill(result,1); THLongStorage_free(newSize); double *res = THDoubleTensor_data(result); double *q = THDoubleTensor_data(quat); double *t = THDoubleTensor_data(trans); double *v = THDoubleTensor_data(vect); double x1, y1, z1; for (j = 0; j < nElementQuat; j += quatStride) { #pragma omp parallel for private(i,x1,y1,z1) for (i = 0; i < nElementVect; i += vectStride) { res[i] = v[i] + t[0]; res[i+1] = v[i+1] + t[1]; res[i+2] = v[i+2] + t[2]; x1 = q[1]*res[i+2] - q[2]*res[i+1]; y1 = q[2]*res[i] - q[0]*res[i+2]; z1 = q[0]*res[i+1] - q[1]*res[i]; res[i] += 2 * (q[3]*x1 + q[1]*z1 - q[2]*y1); res[i+1] += 2 * (q[3]*y1 + q[2]*x1 - q[0]*z1); res[i+2] += 2 * (q[3]*z1 + q[0]*y1 - q[1]*x1); } q += quatStride; t += transStride; res += nElementVect; } }
void rotate_by_quat(THDoubleTensor *result, THDoubleTensor *quat, THDoubleTensor *vect ) { long outDimension = quat->nDimension + vect->nDimension -1; THLongStorage *newSize = THLongStorage_newWithSize(outDimension); long *sd = THLongStorage_data(newSize); long offset = 0; // TODO look at torch.min() or torch.max() to allow vector in any dimension. // which dimension contains quat or vect (default to NxD) char DHW = 0; long quatDim = quat->nDimension-1; long vectDim = vect->nDimension-1; long quatSize = quat->size[quatDim]; // == 4 long vectSize = vect->size[vectDim]; // == 3 or 4 long nElementQuat = THDoubleTensor_nElement(quat); long nElementVect = THDoubleTensor_nElement(vect); // step to get to next dimension long quatDimStride = 1; long vectDimStride = 1; // step to get to next element long quatElemStride = quatSize; long vectElemStride = vectSize; long i,j; // check for DxN // quaternions and vectors are either Nx3,4 or 3,4 x N but must be consistent. if ((quatSize != 4) || ((vectSize != 3) && vectSize != 4)) { vectDim = 0; // test DxN quatDim = 0; quatSize = quat->size[vectDim]; vectSize = vect->size[quatDim]; quatElemStride = 1; vectElemStride = 1; quatDimStride = quat->stride[vectDim]; vectDimStride = vect->stride[quatDim]; DHW = 1; } THArgCheck(quatSize == 4, 2, "quaternion is a vector of length 4"); THArgCheck(((vectSize == 3) || (vectSize == 4)),3, "point vectors should be of length 3 or 4"); long n_vect = nElementVect / vectSize; long n_quat = nElementQuat / quatSize; // get dimensions for the output long start = 0; long quat_end = quat->nDimension-1; long vect_end = vect->nDimension-1; if (DHW > 0) { start++; quat_end++; vect_end++; } // quaternion dimensions for (i = start ; i < quat_end ; i++){ sd[offset] = quat->size[i]; offset += 1; } if (DHW > 0) { // output nquat x 3,4 x nvect sd[offset] = vectSize; offset += 1; } // vector dimensions for (i = start ; i < vect_end ; i++){ sd[offset] = vect->size[i]; offset += 1; } if (DHW==0) { // output nquat x nvect x 3 sd[offset] = vectSize; offset += 1; } // resize the output THDoubleTensor_resize(result, newSize, NULL); if (vectSize == 4) // incase homogenous coordinates are requested THDoubleTensor_fill(result,1); THLongStorage_free(newSize); double *res = THDoubleTensor_data(result); double *q = THDoubleTensor_data(quat); double *v = THDoubleTensor_data(vect); double x1, y1, z1; // how to step through the result long resDimStride = result->stride[outDimension-1]; long resElemStride = vectSize; long resQuatStride = 0; if (DHW>0) { resDimStride = result->stride[quat->nDimension-1]; resElemStride = result->stride[outDimension-1]; if (n_quat > 1) { resQuatStride = result->stride[0] - resDimStride; } } double * qres = res; double * res0 = res; double * res1 = res0 + resDimStride; double * res2 = res1 + resDimStride; double * q0 = q; double * q1 = q0+quatDimStride; double * q2 = q1+quatDimStride; double * q3 = q2+quatDimStride; for (j = 0; j < n_quat; j++) { double * v0 = v; double * v1 = v0+vectDimStride; double * v2 = v1+vectDimStride; #pragma omp parallel for private(i,x1,y1,z1) for (i = 0; i < n_vect; i++) { x1 = (*q1)*(*v2) - (*q2)*(*v1); y1 = (*q2)*(*v0) - (*q0)*(*v2); z1 = (*q0)*(*v1) - (*q1)*(*v0); (*res0) = (*v0) + 2 * ((*q3)*x1 + (*q1)*z1 - (*q2)*y1); (*res1) = (*v1) + 2 * ((*q3)*y1 + (*q2)*x1 - (*q0)*z1); (*res2) = (*v2) + 2 * ((*q3)*z1 + (*q0)*y1 - (*q1)*x1); v0+=vectElemStride; v1+=vectElemStride; v2+=vectElemStride; res0+=resElemStride; res1+=resElemStride; res2+=resElemStride; } q0+=quatElemStride; q1+=quatElemStride; q2+=quatElemStride; q3+=quatElemStride; // facilitate nquats x 3 x nvect output res0 = res0 + resQuatStride; res1 = res0 + resDimStride; res2 = res1 + resDimStride; } }
static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*otime*owidth*oheight*2, indices_data+nslices*otime*owidth*oheight, indices_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices*itime*iwidth*iheight; long ostride = nslices*otime*owidth*oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THLongStorage* size = THLongStorage_newWithSize(6); size->data[0] = 3; size->data[1] = nBatch; size->data[2] = nslices; size->data[3] = otime; size->data[4] = oheight; size->data[5] = owidth; THTensor_(resize)(indices, size, NULL); /* resize6d not available */ //TODO: Replace with resize6d when available //THTensor_(resize6d)(indices, 3, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricMaxPooling_updateOutput_frame)( input_data+p*istride, output_data+p*ostride, indices_data+(p+nBatch+nBatch)*ostride, indices_data+(p+nBatch)*ostride, indices_data+p*ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } THLongStorage_free(size); } /* cleanup */ THTensor_(free)(input); return 1; }