THLongStorage* lab_checklongargs(lua_State *L, int index) { THLongStorage *storage; int i; int narg = lua_gettop(L)-index+1; if(narg == 1 && luaT_toudata(L, index, torch_LongStorage_id)) { THLongStorage *storagesrc = luaT_toudata(L, index, torch_LongStorage_id); storage = THLongStorage_newWithSize(storagesrc->size); THLongStorage_copy(storage, storagesrc); } else { storage = THLongStorage_newWithSize(narg); for(i = index; i < index+narg; i++) { if(!lua_isnumber(L, i)) { THLongStorage_free(storage); luaL_argerror(L, i, "number expected"); } storage->data[i-index] = lua_tonumber(L, i); } } return storage; }
static void load_array_to_lua(lua_State *L, chtk::htkarray& arr){ int ndims = 2; //based on code from mattorch with stride fix int k; THLongStorage *size = THLongStorage_newWithSize(ndims); THLongStorage *stride = THLongStorage_newWithSize(ndims); THLongStorage_set(size,0 , arr.nsamples); THLongStorage_set(size,1,arr.samplesize/4*(2*arr.frm_ext+1)); THLongStorage_set(stride,1,1); THLongStorage_set(stride,0,arr.samplesize/4*(2*arr.frm_ext+1)); void * tensorDataPtr = NULL; size_t numBytes = 0; THFloatTensor *tensor = THFloatTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THFloatTensor_data(tensor)); numBytes = THFloatTensor_nElement(tensor) * 4; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.FloatTensor")); // now copy the data assert(tensorDataPtr); memcpy(tensorDataPtr, (void *)(arr.data<void>()), numBytes); }
/* helpful functions */ static void torch_(Tensor_c_readSize)(lua_State *L, int index, THLongStorage **size_) { THLongStorage *size = NULL; long i; if( (size = luaT_toudata(L, index, torch_LongStorage_id)) ) { THLongStorage_retain(size); *size_ = size; } else { size = THLongStorage_newWithSize(4); for(i = 0; i < 4; i++) { if(lua_isnone(L, index+i)) THLongStorage_set(size, i, 0); else { if(lua_isnumber(L, index+i)) THLongStorage_set(size, i, lua_tonumber(L, index+i)); else { THLongStorage_free(size); luaL_error(L, "invalid argument %d: number expected", index+i); } } } *size_ = size; } }
static int nn_(Min_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); THLongStorage *dim; long i; luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); dim = THLongStorage_newWithSize(input->nDimension); for(i = 0; i < input->nDimension; i++) dim->data[i] = input->size[i]; dim->data[dimension] = 1; THTensor_(resize)(output, dim, NULL); THTensor_(resize)(indices, dim, NULL); THLongStorage_free(dim); TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension, long theIndex = 0; real theMin = input_data[0]; for(i = 1; i < input_size; i++) { if(input_data[i*input_stride] < theMin) { theIndex = i; theMin = input_data[i*input_stride]; } } *indices_data = theIndex+1; *output_data = theMin;) if(output->nDimension > 1)
static void torch_(Tensor_c_readTensorStorageSize)(lua_State *L, int index, int allowNone, int allowTensor, int allowStorage, THStorage **storage_, long *storageOffset_, THLongStorage **size_) { static char errMsg[64]; THTensor *src = NULL; THStorage *storage = NULL; int arg1Type = lua_type(L, index); if( allowNone && (arg1Type == LUA_TNONE) ) { *storage_ = NULL; *storageOffset_ = 0; *size_ = THLongStorage_new(); return; } else if( allowTensor && (arg1Type == LUA_TUSERDATA) && (src = luaT_toudata(L, index, torch_(Tensor_id))) ) { *storage_ = THTensor_(storage)(src); *storageOffset_ = THTensor_(storageOffset)(src); *size_ = THTensor_(newSizeOf)(src); return; } else if( allowStorage && (arg1Type == LUA_TUSERDATA) && (storage = luaT_toudata(L, index, torch_(Storage_id))) ) { *storage_ = storage; if(lua_isnone(L, index+1)) { *storageOffset_ = 0; *size_ = THLongStorage_newWithSize(1); THLongStorage_set(*size_, 1, THStorage_(size)(storage)); } else { *storageOffset_ = luaL_checklong(L, index+1)-1; torch_(Tensor_c_readSize)(L, index+2, size_); } return; } else if( (arg1Type == LUA_TNUMBER) || (luaT_toudata(L, index, torch_LongStorage_id)) ) { *storage_ = NULL; *storageOffset_ = 0; torch_(Tensor_c_readSize)(L, index, size_); return; } sprintf(errMsg, "expecting number%s%s", (allowTensor ? " or Tensor" : ""), (allowStorage ? " or Storage" : "")); luaL_argcheck(L, 0, index, errMsg); }
static int torch_Tensor_(size)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; luaL_argcheck(L, dim >= 0 && dim < tensor->nDimension, 2, "out of range"); lua_pushnumber(L, tensor->size[dim]); } else { THLongStorage *storage = THLongStorage_newWithSize(tensor->nDimension); memmove(storage->data, tensor->size, sizeof(long)*tensor->nDimension); luaT_pushudata(L, storage, "torch.LongStorage"); } return 1; }
static int torch_Tensor_(stride)(lua_State *L) { THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(lua_isnumber(L,2)) { int dim = luaL_checkint(L, 2)-1; THArgCheck(dim >= 0 && dim < tensor->nDimension, 2, "dimension %d out of range of %dD tensor", dim+1, THTensor_(nDimension)(tensor)); luaT_pushlong(L, tensor->stride[dim]); } else { THLongStorage *storage = THLongStorage_newWithSize(tensor->nDimension); memmove(storage->data, tensor->stride, sizeof(long)*tensor->nDimension); luaT_pushudata(L, storage, "torch.LongStorage"); } return 1; }
/* * Based on the implementation of the THTensor_(indexSelect) in torch7 */ static void THCudaTensor_indexSelect(THCudaTensor *tensor, THCudaTensor *src, int dim, THLongTensor *index) { long i, numel; THLongStorage *newSize; THCudaTensor *tSlice, *sSlice; long *index_data; THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds"); THArgCheck(src->nDimension > 0,2,"Source tensor is empty"); numel = THLongTensor_nElement(index); newSize = THLongStorage_newWithSize(src->nDimension); THLongStorage_rawCopy(newSize,src->size); newSize->data[dim] = numel; THCudaTensor_resize(tensor,newSize,NULL); THLongStorage_free(newSize); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (src->nDimension > 1) { tSlice = THCudaTensor_new(); sSlice = THCudaTensor_new(); THCudaTensor_select(tSlice, tensor, dim, i); THCudaTensor_select(sSlice, src, dim, index_data[i]-1); THCudaTensor_copy(tSlice, sSlice); THCudaTensor_free(tSlice); THCudaTensor_free(sSlice); } else { // It's faster to copy a float from an address in the device to another address in the device than // retrieving it to the host memory and recopy it to the device memory THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + i,\ src->storage->data + src->storageOffset + index_data[i]-1, sizeof(float), cudaMemcpyDeviceToDevice)); } } THLongTensor_free(index); }
THLongStorage *THCudaTensor_newStrideOf(THCState *state, THCudaTensor *self) { THLongStorage *stride = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(stride, self->stride); return stride; }
void translate_rotate(THDoubleTensor *result, THDoubleTensor *trans, THDoubleTensor *quat, THDoubleTensor *vect ) { long outDimension = quat->nDimension + vect->nDimension -1; THLongStorage *newSize = THLongStorage_newWithSize(outDimension); long *sd = THLongStorage_data(newSize); long offset = 0; long quatStride = quat->size[quat->nDimension-1]; long transStride = trans->size[trans->nDimension-1]; long vectStride = vect->size[vect->nDimension-1]; long nElementQuat = THDoubleTensor_nElement(quat); long nElementVect = THDoubleTensor_nElement(vect); long nQuat = nElementQuat / quatStride; long nTrans = THDoubleTensor_nElement(trans) / transStride; long i,j; THArgCheck(nTrans == nQuat, 2, "Different number of translations and rotations"); THArgCheck(((transStride == 3) || (transStride == 4)),2, "translation vectors should be of length 3 or 4"); THArgCheck(quatStride == 4, 3, "quaternion is a vector of length 4"); THArgCheck(((vectStride == 3) || (vectStride == 4)), 4, "point vectors should be of length 3 or 4"); for (i = 0 ; i < quat->nDimension-1 ; i++){ sd[offset] = quat->size[i]; offset += 1; } for (i = 0 ; i < vect->nDimension-1 ; i++){ sd[offset] = vect->size[i]; offset += 1; } sd[offset] = vectStride; THDoubleTensor_resize(result, newSize, NULL); if (vectStride == 4) // incase homogenous coordinates are requested THDoubleTensor_fill(result,1); THLongStorage_free(newSize); double *res = THDoubleTensor_data(result); double *q = THDoubleTensor_data(quat); double *t = THDoubleTensor_data(trans); double *v = THDoubleTensor_data(vect); double x1, y1, z1; for (j = 0; j < nElementQuat; j += quatStride) { #pragma omp parallel for private(i,x1,y1,z1) for (i = 0; i < nElementVect; i += vectStride) { res[i] = v[i] + t[0]; res[i+1] = v[i+1] + t[1]; res[i+2] = v[i+2] + t[2]; x1 = q[1]*res[i+2] - q[2]*res[i+1]; y1 = q[2]*res[i] - q[0]*res[i+2]; z1 = q[0]*res[i+1] - q[1]*res[i]; res[i] += 2 * (q[3]*x1 + q[1]*z1 - q[2]*y1); res[i+1] += 2 * (q[3]*y1 + q[2]*x1 - q[0]*z1); res[i+2] += 2 * (q[3]*z1 + q[0]*y1 - q[1]*x1); } q += quatStride; t += transStride; res += nElementVect; } }
void rotate_by_quat(THDoubleTensor *result, THDoubleTensor *quat, THDoubleTensor *vect ) { long outDimension = quat->nDimension + vect->nDimension -1; THLongStorage *newSize = THLongStorage_newWithSize(outDimension); long *sd = THLongStorage_data(newSize); long offset = 0; // TODO look at torch.min() or torch.max() to allow vector in any dimension. // which dimension contains quat or vect (default to NxD) char DHW = 0; long quatDim = quat->nDimension-1; long vectDim = vect->nDimension-1; long quatSize = quat->size[quatDim]; // == 4 long vectSize = vect->size[vectDim]; // == 3 or 4 long nElementQuat = THDoubleTensor_nElement(quat); long nElementVect = THDoubleTensor_nElement(vect); // step to get to next dimension long quatDimStride = 1; long vectDimStride = 1; // step to get to next element long quatElemStride = quatSize; long vectElemStride = vectSize; long i,j; // check for DxN // quaternions and vectors are either Nx3,4 or 3,4 x N but must be consistent. if ((quatSize != 4) || ((vectSize != 3) && vectSize != 4)) { vectDim = 0; // test DxN quatDim = 0; quatSize = quat->size[vectDim]; vectSize = vect->size[quatDim]; quatElemStride = 1; vectElemStride = 1; quatDimStride = quat->stride[vectDim]; vectDimStride = vect->stride[quatDim]; DHW = 1; } THArgCheck(quatSize == 4, 2, "quaternion is a vector of length 4"); THArgCheck(((vectSize == 3) || (vectSize == 4)),3, "point vectors should be of length 3 or 4"); long n_vect = nElementVect / vectSize; long n_quat = nElementQuat / quatSize; // get dimensions for the output long start = 0; long quat_end = quat->nDimension-1; long vect_end = vect->nDimension-1; if (DHW > 0) { start++; quat_end++; vect_end++; } // quaternion dimensions for (i = start ; i < quat_end ; i++){ sd[offset] = quat->size[i]; offset += 1; } if (DHW > 0) { // output nquat x 3,4 x nvect sd[offset] = vectSize; offset += 1; } // vector dimensions for (i = start ; i < vect_end ; i++){ sd[offset] = vect->size[i]; offset += 1; } if (DHW==0) { // output nquat x nvect x 3 sd[offset] = vectSize; offset += 1; } // resize the output THDoubleTensor_resize(result, newSize, NULL); if (vectSize == 4) // incase homogenous coordinates are requested THDoubleTensor_fill(result,1); THLongStorage_free(newSize); double *res = THDoubleTensor_data(result); double *q = THDoubleTensor_data(quat); double *v = THDoubleTensor_data(vect); double x1, y1, z1; // how to step through the result long resDimStride = result->stride[outDimension-1]; long resElemStride = vectSize; long resQuatStride = 0; if (DHW>0) { resDimStride = result->stride[quat->nDimension-1]; resElemStride = result->stride[outDimension-1]; if (n_quat > 1) { resQuatStride = result->stride[0] - resDimStride; } } double * qres = res; double * res0 = res; double * res1 = res0 + resDimStride; double * res2 = res1 + resDimStride; double * q0 = q; double * q1 = q0+quatDimStride; double * q2 = q1+quatDimStride; double * q3 = q2+quatDimStride; for (j = 0; j < n_quat; j++) { double * v0 = v; double * v1 = v0+vectDimStride; double * v2 = v1+vectDimStride; #pragma omp parallel for private(i,x1,y1,z1) for (i = 0; i < n_vect; i++) { x1 = (*q1)*(*v2) - (*q2)*(*v1); y1 = (*q2)*(*v0) - (*q0)*(*v2); z1 = (*q0)*(*v1) - (*q1)*(*v0); (*res0) = (*v0) + 2 * ((*q3)*x1 + (*q1)*z1 - (*q2)*y1); (*res1) = (*v1) + 2 * ((*q3)*y1 + (*q2)*x1 - (*q0)*z1); (*res2) = (*v2) + 2 * ((*q3)*z1 + (*q0)*y1 - (*q1)*x1); v0+=vectElemStride; v1+=vectElemStride; v2+=vectElemStride; res0+=resElemStride; res1+=resElemStride; res2+=resElemStride; } q0+=quatElemStride; q1+=quatElemStride; q2+=quatElemStride; q3+=quatElemStride; // facilitate nquats x 3 x nvect output res0 = res0 + resQuatStride; res1 = res0 + resDimStride; res2 = res1 + resDimStride; } }
THLongStorage *THZTensor_(newStrideOf)(THZTensor *self) { THLongStorage *stride = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(stride, self->stride); return stride; }
THLongStorage *THZTensor_(newSizeOf)(THZTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(size, self->size); return size; }
THLongStorage *THCSTensor_(newSizeOf)(THCState *state, THCSTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->nDimensionI + self->nDimensionV); THLongStorage_rawCopy(size, self->size); return size; }
THLongStorage *THTensor_(newSizeOf)(THTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->_dim()); THLongStorage_rawCopy(size, self->size); return size; }
THLongStorage *THTensor_(newStrideOf)(THTensor *self) { THLongStorage *stride = THLongStorage_newWithSize(self->_dim()); THLongStorage_rawCopy(stride, self->stride); return stride; }
static void load_array_to_lua(lua_State *L, cnpy::NpyArray& arr){ int ndims = arr.shape.size(); //based on code from mattorch with stride fix int k; THLongStorage *size = THLongStorage_newWithSize(ndims); THLongStorage *stride = THLongStorage_newWithSize(ndims); for (k=0; k<ndims; k++) { THLongStorage_set(size, k, arr.shape[k]); if (k > 0) THLongStorage_set(stride, ndims-k-1, arr.shape[ndims-k]*THLongStorage_get(stride,ndims-k)); else THLongStorage_set(stride, ndims-k-1, 1); } void * tensorDataPtr = NULL; size_t numBytes = 0; if ( arr.arrayType == 'f' ){ // float32/64 if ( arr.word_size == 4 ){ //float32 THFloatTensor *tensor = THFloatTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THFloatTensor_data(tensor)); numBytes = THFloatTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.FloatTensor")); }else if ( arr.word_size == 8){ //float 64 THDoubleTensor *tensor = THDoubleTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THDoubleTensor_data(tensor)); numBytes = THDoubleTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.DoubleTensor")); } }else if ( arr.arrayType == 'i' || arr.arrayType == 'u' ){ // does torch have unsigned types .. need to look if ( arr.word_size == 1 ){ //int8 THByteTensor *tensor = THByteTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THByteTensor_data(tensor)); numBytes = THByteTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ByteTensor")); }else if ( arr.word_size == 2 ){ //int16 THShortTensor *tensor = THShortTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THShortTensor_data(tensor)); numBytes = THShortTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ShortTensor")); }else if ( arr.word_size == 4 ){ //int32 THIntTensor *tensor = THIntTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THIntTensor_data(tensor)); numBytes = THIntTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.IntTensor")); }else if ( arr.word_size == 8){ //long 64 THLongTensor *tensor = THLongTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THLongTensor_data(tensor)); numBytes = THLongTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.LongTensor")); } }else{ printf("array type unsupported"); throw std::runtime_error("unsupported data type"); } // now copy the data assert(tensorDataPtr); memcpy(tensorDataPtr, (void *)(arr.data<void>()), numBytes); }
static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*otime*owidth*oheight*2, indices_data+nslices*otime*owidth*oheight, indices_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices*itime*iwidth*iheight; long ostride = nslices*otime*owidth*oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ THLongStorage* size = THLongStorage_newWithSize(6); size->data[0] = 3; size->data[1] = nBatch; size->data[2] = nslices; size->data[3] = otime; size->data[4] = oheight; size->data[5] = owidth; THTensor_(resize)(indices, size, NULL); /* resize6d not available */ //TODO: Replace with resize6d when available //THTensor_(resize6d)(indices, 3, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricMaxPooling_updateOutput_frame)( input_data+p*istride, output_data+p*ostride, indices_data+(p+nBatch+nBatch)*ostride, indices_data+(p+nBatch)*ostride, indices_data+p*ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } THLongStorage_free(size); } /* cleanup */ THTensor_(free)(input); return 1; }
THLongStorage *THClTensor_newSizeOf(THClState *state, THClTensor *self) { THLongStorage *size = THLongStorage_newWithSize(self->nDimension); THLongStorage_rawCopy(size, self->size); return size; }