コード例 #1
0
ファイル: roi_align.c プロジェクト: Dtean/faster-rcnn.pytorch
int roi_align_forward(int aligned_height, int aligned_width, float spatial_scale,
                     THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
{
    //Grab the input tensor
    float * data_flat = THFloatTensor_data(features);
    float * rois_flat = THFloatTensor_data(rois);

    float * output_flat = THFloatTensor_data(output);

    // Number of ROIs
    int num_rois = THFloatTensor_size(rois, 0);
    int size_rois = THFloatTensor_size(rois, 1);
    if (size_rois != 5)
    {
        return 0;
    }

    // data height
    int data_height = THFloatTensor_size(features, 2);
    // data width
    int data_width = THFloatTensor_size(features, 3);
    // Number of channels
    int num_channels = THFloatTensor_size(features, 1);

    // do ROIAlignForward
    ROIAlignForwardCpu(data_flat, spatial_scale, num_rois, data_height, data_width, num_channels,
            aligned_height, aligned_width, rois_flat, output_flat);

    return 1;
}
コード例 #2
0
ファイル: thbasic.c プロジェクト: lijian8/thnets
void THFloatTensor_convmm(THFloatTensor *r, float beta, float alpha, THFloatTensor *filt, THFloatTensor *m,
	int kH, int kW, int dH, int dW, int padH, int padW)
{
	struct sgemmargs args;

	args.transa = 0;
	args.transb = 0;
	args.m = r->size[1] * r->size[2];
	args.n = r->size[0];
	args.k = filt->size[1];
	args.alpha = alpha;
	args.beta = beta;
	args.lda = m->stride[0];
	args.ldb = filt->stride[0];
	args.ldc = r->stride[0];
	args.a = THFloatTensor_data(m);
	args.b = THFloatTensor_data(filt);
	args.c = THFloatTensor_data(r);
	args.ks0 = kH * kW;
	args.ks1 = kW;
	args.is0 = m->stride[0];
	args.is1 = m->stride[1];
	args.ih = m->size[1];
	args.os0 = r->stride[0];
	args.os1 = r->stride[1];
	args.dW = dW;
	args.dH = dH;
	args.padW = padW;
	args.padH = padH;
	sgemmargs(&args);
}
コード例 #3
0
ファイル: Tensor.c プロジェクト: IoannisAntonoglou/cutorch
static int cuda_FloatTensor_fakecopy(lua_State *L)
{
  THFloatTensor *self = luaT_checkudata(L, 1, "torch.FloatTensor");
  THFloatTensor *src = luaT_checkudata(L, 2, "torch.FloatTensor");
  long *d_self_sz, *d_self_st, *d_src_sz, *d_src_st;
  long nElement = THFloatTensor_nElement(self);

  THArgCheck(THFloatTensor_nElement(self) == THFloatTensor_nElement(src), 2, "sizes do not match"); 

  THFloatTensor_computesz(self, &d_self_sz, &d_self_st);
  THFloatTensor_computesz(src, &d_src_sz, &d_src_st);

  THFloatTensor_kernel_copy(THFloatTensor_data(self), 
                            d_self_sz, d_self_st, self->nDimension,
                            THFloatTensor_data(src),
                            d_src_sz, d_src_st, src->nDimension,
                            nElement);
  
  THFree(d_self_sz);
  THFree(d_self_st);
  THFree(d_src_sz);
  THFree(d_src_st);

  lua_settop(L, 1);
  return 1;
}
コード例 #4
0
ファイル: roi_align.c プロジェクト: Dtean/faster-rcnn.pytorch
int roi_align_backward(int aligned_height, int aligned_width, float spatial_scale,
                       THFloatTensor * top_grad, THFloatTensor * rois, THFloatTensor * bottom_grad)
{
    //Grab the input tensor
    float * top_grad_flat = THFloatTensor_data(top_grad);
    float * rois_flat = THFloatTensor_data(rois);

    float * bottom_grad_flat = THFloatTensor_data(bottom_grad);

    // Number of ROIs
    int num_rois = THFloatTensor_size(rois, 0);
    int size_rois = THFloatTensor_size(rois, 1);
    if (size_rois != 5)
    {
        return 0;
    }

    // batch size
    // int batch_size = THFloatTensor_size(bottom_grad, 0);
    // data height
    int data_height = THFloatTensor_size(bottom_grad, 2);
    // data width
    int data_width = THFloatTensor_size(bottom_grad, 3);
    // Number of channels
    int num_channels = THFloatTensor_size(bottom_grad, 1);

    // do ROIAlignBackward
    ROIAlignBackwardCpu(top_grad_flat, spatial_scale, num_rois, data_height,
            data_width, num_channels, aligned_height, aligned_width, rois_flat, bottom_grad_flat);

    return 1;
}
コード例 #5
0
THFloatTensor *cudnn_SpatialMaxPooling_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialMaxPooling.kW;
	int kH = module->SpatialMaxPooling.kH;
	int dW = module->SpatialMaxPooling.dW;
	int dH = module->SpatialMaxPooling.dH;
	int padW = module->SpatialMaxPooling.padW;
	int padH = module->SpatialMaxPooling.padH;

	THFloatTensor *output = module->output;
	cudnnTensorDescriptor_t dinput, doutput;
	cudnnPoolingDescriptor_t dpool;
	float one = 1, zero = 0;
	int sizes[4];

	errcheck(THcudnn_TensorDescriptor(&dinput, input));
	errcheck(cudnnCreatePoolingDescriptor(&dpool));
	errcheck(cudnnSetPooling2dDescriptor(dpool, CUDNN_POOLING_MAX, kH, kW, padH, padW, dH, dW));
	errcheck(cudnnGetPoolingNdForwardOutputDim(dpool, dinput, 4, sizes));
	THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));

	errcheck(cudnnPoolingForward(THcudnn_getHandle(), dpool, &one, dinput, THFloatTensor_data(input), &zero,
		doutput, THFloatTensor_data(output)));

	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyTensorDescriptor(doutput);
	cudnnDestroyPoolingDescriptor(dpool);
	return output;
}
コード例 #6
0
ファイル: thbasic.c プロジェクト: lijian8/thnets
void THFloatTensor_mul(THFloatTensor *r_, THFloatTensor *t, float value)
{
	float *tp = THFloatTensor_data(t);
	float *rp = THFloatTensor_data(r_);
	long i;
	long sz = THFloatTensor_nElement(t);

#pragma omp parallel for if(sz > TH_OMP_OVERHEAD_THRESHOLD) private(i)
	for (i=0; i<sz; i++)
		rp[i] = tp[i] * value;
}
コード例 #7
0
ファイル: smrdist.c プロジェクト: Aysegul/smr-tracker
static int dist_smr(lua_State * L) 
{
   // get args
   const void* torch_FloatTensor_id = luaT_checktypename2id(L, "torch.FloatTensor");
   THFloatTensor *output_ptr = luaT_checkudata(L, 1, torch_FloatTensor_id);
   THFloatTensor *input_ptr = luaT_checkudata(L, 2, torch_FloatTensor_id);
   THFloatTensor *kernel_ptr = luaT_checkudata(L, 3, torch_FloatTensor_id);
   float dynamic = lua_tonumber(L, 4);
   int begin_x = lua_tonumber(L, 5);
   int end_x   = lua_tonumber(L, 6);
   int begin_y = lua_tonumber(L, 7);
   int end_y   = lua_tonumber(L, 8);
         
   // get raw pointers
   float *output = THFloatTensor_data(output_ptr);
   float *input = THFloatTensor_data(input_ptr);
   float *kernel = THFloatTensor_data(kernel_ptr);
   
   // dims
   int kheight = kernel_ptr->size[0];
   int kwidth  = kernel_ptr->size[1];

   //strides
   long *is = input_ptr->stride;
   long *ks = kernel_ptr->stride;
   long *os = output_ptr->stride;

  
   // similarity matching ratio (SMR)
   int i, j, x, y, pos;
   float probability;
   float distance;
   for(y = begin_y; y < end_y; y++) {
      for(x = begin_x; x < end_x; x++) {
        
         pos = y*is[0]+x*is[1];
         probability = 0;
         for(j=0; j< kheight; j++) {
            for(i=0; i< kwidth; i++) {
               distance = abs(input[ pos+j*is[0]+i*is[1] ]- kernel[ j*ks[0]+i*ks[1] ]);
               if (distance<dynamic/2)
                  probability = probability + exp(-2*distance);
              }
         }
         output[y*os[0]+x*os[1]] = probability;
      }
   }
   lua_newtable(L);           // result = {}
   int result = lua_gettop(L); 
   return 0;
}
コード例 #8
0
ファイル: opencv.c プロジェクト: jtbates/lua---camera
// frame grabber
static int l_grabFrame (lua_State *L) {
  // Get Tensor's Info
  const int idx = lua_tonumber(L, 1);
  THFloatTensor * tensor = luaT_checkudata(L, 2, luaT_checktypename2id(L, "torch.FloatTensor"));

  // grab frame
  frame[idx] = cvQueryFrame ( capture[idx] );
  if( !frame[idx] ) {
    perror("could not query OpenCV capture");
  }

  // resize given tensor
  THFloatTensor_resize3d(tensor, 3, frame[idx]->height, frame[idx]->width);

  // copy to tensor
  int m0 = tensor->stride[1];
  int m1 = tensor->stride[2];
  int m2 = tensor->stride[0];
  unsigned char *src = frame[idx]->imageData;
  float *dst = THFloatTensor_data(tensor);
  int i, j, k;
  for (i=0; i < frame[idx]->height; i++) {
    for (j=0, k=0; j < frame[idx]->width; j++, k+=m1) {
      // red:
      dst[k] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 2]/255.;
      // green:
      dst[k+m2] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 1]/255.;
      // blue:
      dst[k+2*m2] = src[i*frame[idx]->widthStep + j*frame[idx]->nChannels + 0]/255.;
    }
    dst += m0;
  }

  return 0;
}
コード例 #9
0
ファイル: htktoth.cpp プロジェクト: simpleoier/cnnsv
static void load_array_to_lua(lua_State *L, chtk::htkarray& arr){
	int ndims = 2;

	//based on code from mattorch with stride fix
	int k;
    THLongStorage *size = THLongStorage_newWithSize(ndims);
    THLongStorage *stride = THLongStorage_newWithSize(ndims);
    THLongStorage_set(size,0 , arr.nsamples);
	THLongStorage_set(size,1,arr.samplesize/4*(2*arr.frm_ext+1));
	THLongStorage_set(stride,1,1);
	THLongStorage_set(stride,0,arr.samplesize/4*(2*arr.frm_ext+1));
    void * tensorDataPtr = NULL;
    size_t numBytes = 0;

	THFloatTensor *tensor = THFloatTensor_newWithSize(size, stride);
    tensorDataPtr = (void *)(THFloatTensor_data(tensor));

    numBytes = THFloatTensor_nElement(tensor) * 4;
    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.FloatTensor"));
	// now copy the data
    assert(tensorDataPtr);
	memcpy(tensorDataPtr, (void *)(arr.data<void>()), numBytes);


}
コード例 #10
0
// Copy extracted patches to CUDA memory and run the network
// One has to keep mind that GPU memory is limited and extracting too many patches
// at once might cause troubles
// So if you need to extract a lot of patches, an efficient way would be to
// devide the set in smaller equal parts and preallocate CPU and GPU memory
void extractDescriptors(THCState *state,
    cunn::Sequential::Ptr net,
    const std::vector<cv::Mat>& patches,
    cv::Mat& descriptors)
{
  size_t batch_size = 128;
  size_t N = patches.size();

  THFloatTensor *buffer = THFloatTensor_newWithSize4d(batch_size, 1, M, M);
  THCudaTensor *input = THCudaTensor_newWithSize4d(state, batch_size, 1, M, M);

  for(int j=0; j < ceil((float)N/batch_size); ++j)
  {
    float *data = THFloatTensor_data(buffer);
    size_t k = 0;
    for(size_t i = j*batch_size; i < std::min((j+1)*batch_size, N); ++i, ++k)
      memcpy(data + k*M*M, patches[i].data, sizeof(float) * M * M);

    // initialize 4D CUDA tensor and copy patches into it
    THCudaTensor_copyFloat(state, input, buffer);

    // propagate through the network
    THCudaTensor *output = net->forward(input);

    // copy descriptors back
    THFloatTensor *desc = THFloatTensor_newWithSize2d(output->size[0], output->size[1]);
    THFloatTensor_copyCuda(state, desc, output);

    size_t feature_dim = output->size[1];
    if(descriptors.cols != feature_dim || descriptors.rows != N)
      descriptors.create(N, feature_dim, CV_32F);

    memcpy(descriptors.data + j * feature_dim * batch_size * sizeof(float),
        THFloatTensor_data(desc),
        sizeof(float) * feature_dim * k);

    THFloatTensor_free(desc);
  }

  THCudaTensor_free(state, input);
  THFloatTensor_free(buffer);
}
コード例 #11
0
ファイル: thbasic.c プロジェクト: lijian8/thnets
void THFloatTensor_addmv(THFloatTensor *r_, float beta, THFloatTensor *t, float alpha, THFloatTensor *mat, THFloatTensor *vec)
{
	if( (mat->nDimension != 2) || (vec->nDimension != 1) )
		THError("matrix and vector expected, got %dD, %dD", mat->nDimension, vec->nDimension);

	if( mat->size[1] != vec->size[0] )
		THError("size mismatch, %s, %s", mat->size[1], vec->size[0]);

	if(t->nDimension != 1)
		THError("vector expected, got t: %dD", t->nDimension);

	if(t->size[0] != mat->size[0])
		THError("size mismatch, t: %ld, mat: %ld", t->size[0], mat->size[0]);

	if(r_ != t)
		THError("r_ != t not implemented");

	if(mat->stride[0] == 1)
	{
		THBlas_gemv('n', mat->size[0], mat->size[1], alpha, THFloatTensor_data(mat), mat->stride[1],
			THFloatTensor_data(vec), vec->stride[0], beta, THFloatTensor_data(r_), r_->stride[0]);
	}
	else if(mat->stride[1] == 1)
	{
		THBlas_gemv('t',  mat->size[1], mat->size[0], alpha, THFloatTensor_data(mat), mat->stride[0],
			THFloatTensor_data(vec), vec->stride[0], beta, THFloatTensor_data(r_), r_->stride[0]);
	}
	else THError("addmv for non-contiguous not implemented");
}
コード例 #12
0
ファイル: thbasic.c プロジェクト: lijian8/thnets
void THFloatTensor_addr(THFloatTensor *r_, float beta, THFloatTensor *t, float alpha, THFloatTensor *vec1, THFloatTensor *vec2)
{
	if( (vec1->nDimension != 1) || (vec2->nDimension != 1) )
		THError("vector and vector expected, got %dD, %dD tensors", vec1->nDimension, vec2->nDimension);

	if(t->nDimension != 2)
		THError("expected matrix, got %dD tensor for t", t->nDimension);

	if( (t->size[0] != vec1->size[0]) || (t->size[1] != vec2->size[0]) )
		THError("size mismatch, t: %ld, vec1: %ld, t: %ld, vec2: %ld", t->size[0], vec1->size[0], t->size[1], vec2->size[0]);

	if(r_ != t)
		THError("r_ != t not implemented");

	if(beta != 1)
		THFloatTensor_mul(r_, r_, beta);

  if(r_->stride[0] == 1)
  {
    THBlas_ger(vec1->size[0], vec2->size[0],
                 alpha, THFloatTensor_data(vec1), vec1->stride[0],
                 THFloatTensor_data(vec2), vec2->stride[0],
                 THFloatTensor_data(r_), r_->stride[1]);
  }
  else if(r_->stride[1] == 1)
  {
    THBlas_ger(vec2->size[0], vec1->size[0],
                 alpha, THFloatTensor_data(vec2), vec2->stride[0],
                 THFloatTensor_data(vec1), vec1->stride[0],
                 THFloatTensor_data(r_), r_->stride[0]);
  }
  else THError("addr for non-contiguous not implemented");
}
コード例 #13
0
ファイル: cudnn_Threshold.c プロジェクト: Aerobota/thnets
THFloatTensor *cudnn_Threshold_updateOutput(struct module *module, THFloatTensor *input)
{
	THFloatTensor *output = module->output;
	cudnnTensorDescriptor_t dinput, doutput;
	int inplace = module->Threshold.inplace;
	float one = 1, zero = 0;

	errcheck(THcudnn_TensorDescriptor(&dinput, input));
	if(inplace)
		THFloatTensor_set(output, input);
	else THCudaTensor_resize4d(output, input->size[0], input->size[1], input->size[2], input->size[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));

	errcheck(cudnnActivationForward(THcudnn_getHandle(), CUDNN_ACTIVATION_RELU, &one, dinput, THFloatTensor_data(input), &zero,
		doutput, THFloatTensor_data(output)));

	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyTensorDescriptor(doutput);
	return output;
}
コード例 #14
0
    static int parse(lua_State *L)
    {
        const char* id = luaT_typenameid(L, "torch.FloatTensor"); //Get float
        THFloatTensor *tensor = (THFloatTensor*) luaT_checkudata(L, 1, id); //Check if float
        float *input_data = THFloatTensor_data(tensor); //Pointer to tensor region

        float threshold = lua_tonumber(L, 2); //Threshold sent by lua
        int table_blobs = 3;
        int idx = lua_objlen(L, 3) + 1;
        float scale = lua_tonumber(L, 4);  //Which scale was this called for?

        // loop over pixels
        int x,y;
        for (y=0; y<tensor->size[0]; y++) {
            for (x=0; x<tensor->size[1]; x++) {
                float val = THFloatTensor_get2d(tensor, y, x);
                if (val > threshold) {
                    // entry = {}
                    lua_newtable(L);
                    int entry = lua_gettop(L);

                    // entry[1] = x
                    lua_pushnumber(L, x);
                    lua_rawseti(L, entry, 1);

                    // entry[2] = y
                    lua_pushnumber(L, y);
                    lua_rawseti(L, entry, 2);

                    // entry[3] = scale
                    lua_pushnumber(L, scale);
                    lua_rawseti(L, entry, 3);

                    // blobs[idx] = entry; idx = idx + 1
                    lua_rawseti(L, table_blobs, idx++);
                }
            }
        }
        return 1;
    }
コード例 #15
0
ファイル: npy4th.cpp プロジェクト: gcr/npy4th
static void load_array_to_lua(lua_State *L, cnpy::NpyArray& arr){
	int ndims = arr.shape.size();

	//based on code from mattorch with stride fix
	int k;
	THLongStorage *size = THLongStorage_newWithSize(ndims);
	THLongStorage *stride = THLongStorage_newWithSize(ndims);
	for (k=0; k<ndims; k++) {
		THLongStorage_set(size, k, arr.shape[k]);
		if (k > 0)
			THLongStorage_set(stride, ndims-k-1, arr.shape[ndims-k]*THLongStorage_get(stride,ndims-k));
		else
			THLongStorage_set(stride, ndims-k-1, 1);
	}

	void * tensorDataPtr = NULL;
	size_t numBytes = 0;

	if ( arr.arrayType == 'f' ){ // float32/64
		if ( arr.word_size == 4 ){ //float32
			THFloatTensor *tensor = THFloatTensor_newWithSize(size, stride);
		    tensorDataPtr = (void *)(THFloatTensor_data(tensor));
		    numBytes = THFloatTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.FloatTensor"));
    
		}else if ( arr.word_size ==  8){ //float 64
			THDoubleTensor *tensor = THDoubleTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THDoubleTensor_data(tensor));
		    numBytes = THDoubleTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.DoubleTensor"));
		}
	}else if ( arr.arrayType == 'i' || arr.arrayType == 'u' ){ // does torch have unsigned types .. need to look
		if ( arr.word_size == 1 ){ //int8
			THByteTensor *tensor = THByteTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THByteTensor_data(tensor));
		    numBytes = THByteTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ByteTensor"));
    
		}else if ( arr.word_size == 2 ){ //int16
			THShortTensor *tensor = THShortTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THShortTensor_data(tensor));
		    numBytes = THShortTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ShortTensor"));
    
		}else if ( arr.word_size == 4 ){ //int32
			THIntTensor *tensor = THIntTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THIntTensor_data(tensor));
		    numBytes = THIntTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.IntTensor"));
    
		}else if ( arr.word_size ==  8){ //long 64
			THLongTensor *tensor = THLongTensor_newWithSize(size, stride);
			tensorDataPtr = (void *)(THLongTensor_data(tensor));
		    numBytes = THLongTensor_nElement(tensor) * arr.word_size;
		    luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.LongTensor"));
		}
	}else{
		printf("array type unsupported");
		throw std::runtime_error("unsupported data type");
	}

		// now copy the data
		assert(tensorDataPtr);
		memcpy(tensorDataPtr, (void *)(arr.data<void>()), numBytes);


}
コード例 #16
0
    JNIEXPORT float JNICALL
    Java_com_torchandroid_facedemo_CameraClass_callTorch(JNIEnv *env, jobject thiz, jlong torchStateLocation,
            jint width, jint height, jbyteArray NV21FrameData, jintArray outPixels) {

        lua_State *L = (lua_State*) torchStateLocation;

        float netProfiler = 0;

        THFloatTensor *testTensor = THFloatTensor_newWithSize1d(1280*768);  //Initialize 1D tensor.
        jbyte *testTensor_data; //Initialize tensor to store java byte data from camera.
        testTensor_data = (env)->GetByteArrayElements(NV21FrameData,0); //Get pointer to java byte array region
        int imSize = 1280*768; //Define number of pixels

        jfloat *poutPixels = THFloatTensor_data(testTensor); //Torch tensor type to int
        jint *output = env->GetIntArrayElements(outPixels, 0); //Get java int array region for output

        //This loop ignores U and V channels. Network doesn't use them
        //Cam data comes like so - YYYYYY ... imSize times.... YYYYY UVUVUVUVUVUV.... <- ignore these
        for(int i = 0; i < imSize; i++)
        {
            output[i] = 0;
            poutPixels[i] = testTensor_data[i] & 0xFF;
        }

        int tableSize = 0; //Holds number of detections

        int *fill;

        lua_getglobal(L,"getDetections");

        lua_getglobal(L,"network");
        luaT_pushudata(L,testTensor,"torch.FloatTensor"); //Push tensor to lua stack
        lua_pushnumber(L,width);
        lua_pushnumber(L,height);
        if(lua_pcall(L,4,3,0) != 0) //Call function. Print error if call not successful
            __android_log_print(ANDROID_LOG_INFO, "Torchandroid", "Error running function: %s",lua_tostring(L, -1));

        else {

            netProfiler = (float) lua_tonumber(L,-1);
            lua_pop(L,1);

            tableSize = lua_tointeger(L,-1);  //Get #detections from stack
            lua_pop(L,1);
            if(tableSize != 0)  //Extract x,y,w,h for each detection
            {
                fill = (int*) malloc(4*tableSize*sizeof(int)); //Holds detections
                PrintTable(L,tableSize,fill);

            }
        }

        if(tableSize != 0)
        {
            int center[2] = {0};
            for(int i = 0; i < 4*tableSize; i+=4)
            {
                int x = fill[i];
                int y = fill[i+1];
                int w = fill[i+2];
                int h = fill[i+3];

                for(int j = i+4; j < 4*tableSize; j+=4)
                {
                    center[0] = fill[j]+fill[j+2]*0.5; //x center
                    center[1] = fill[j+1]+fill[j+3]*0.5; //y center
                    if(((center[0] <= (x+w)) && (center[0] >= x)) && ((center[1] <= (y+h)) && (center[1] >= y)))
                    {
                        fill[j+2] = 0;
                    }
                }

            }
        }

        if(tableSize != 0)
        {
            int tempnum2 = 2*1280;
            int tempnum3 = 3*1280;

            int jlim = 0; //Define to prevent computation of loop control for each iteration. Efficiency FTW
            int tempnum1 = 0;
            int center[2] = {0};  //Holds center of box xy

            for(int i = 0; i < 4*tableSize; i+=4)
            {

                if(fill[i+2] == 0)
                    continue;

                int x = fill[i];
                int y = fill[i+1];
                int w = fill[i+2];
                int h = fill[i+3];

                int tempnum2 = h*2*1280;
                int tempnum3 = h*3*1280;

                __android_log_print(ANDROID_LOG_INFO, "Torchandroid", "x = %u y = %u w = %u h = %u",x,y,w,h);


                jlim = ((y-1)*1280+x+w);
                tempnum1 = 1280*(h-1);
                //Assign output pixels red color. 4 byte - ARGB. x,y from network in 2D. Convert to 1 D
                for(int j = (y-1)*1280+x; j < jlim; j++)  //This loop does top and bottom lines of box
                {
                    output[j-1280] = 0xFFFF0000;
                    output[j+1280] = 0xFFFF0000;
                    output[j] = 0xFFFF0000;
                    output[j+tempnum1] = 0xFFFF0000;
                    output[j+tempnum1-1280] = 0xFFFF0000;
                    output[j+tempnum1+1280] = 0xFFFF0000;
                }

                jlim = (((y-1)*1280+x)+((1280*h)+w));
                for(int j = (y-1)*1280+x; j < jlim; j+=1280) //This loop does left and right of box
                {
                    output[j+1] = 0xFFFF0000;
                    output[j-1] = 0xFFFF0000;
                    output[j] = 0xFFFF0000;
                    output[j+w] = 0xFFFF0000;
                    output[j+w+1] = 0xFFFF0000;
                    output[j+w-1] = 0xFFFF0000;
                }
            }
        }

        env->ReleaseByteArrayElements(NV21FrameData, testTensor_data, 0); //Destroy pointer to location in C. Only need java now
        env->ReleaseIntArrayElements(outPixels, output, 0); //Same as above here
        return netProfiler;
    }
コード例 #17
0
static void nn_unfolded_copy(THFloatTensor *finput, THFloatTensor *input,
	int kW, int kH, int dW, int dH, int padW, int padH,
	int nInputPlane, int inputWidth, int inputHeight,
	int outputWidth, int outputHeight)
{
	long k;
	float *input_data = THFloatTensor_data(input);
	float *finput_data = THFloatTensor_data(finput);

#pragma omp parallel for private(k)
	for(k = 0; k < nInputPlane*kH*kW; k++) {
		long nip = k / (kH*kW);
		long rest = k % (kH*kW);
		long kh = rest / kW;
		long kw = rest % kW;
		long x,y;
		long long ix,iy;
		float *dst = finput_data + nip*(kH*kW*outputHeight*outputWidth) + kh*(kW*outputHeight*outputWidth) + kw*(outputHeight*outputWidth);
		float *src = input_data + nip*(inputHeight*inputWidth);
		if (padW > 0 || padH > 0) {
			long lpad,rpad;
			for(y = 0; y < outputHeight; y++) {
				iy = (long long)(y*dH - padH + kh);
				if (iy < 0 || iy >= inputHeight) {
					memset(dst+y*outputWidth, 0, sizeof(float)*outputWidth);
				} else {
					if (dW==1){
						ix = (long long)(0 - padW + kw);
						lpad = fmaxf(0,padW-kw);
						rpad = fmaxf(0,padW-(kW-kw-1));
						if (outputWidth-rpad-lpad <= 0) {
							memset(dst+(y*outputWidth), 0, sizeof(float)*outputWidth);
						} else {
							if (lpad > 0) memset(dst+y*outputWidth, 0, sizeof(float)*lpad);
							memcpy(dst+(y*outputWidth+lpad), src+(iy*inputWidth+ix+lpad), sizeof(float)*(outputWidth-rpad-lpad));
							if (rpad > 0) memset(dst+y*outputWidth + outputWidth - rpad, 0, sizeof(float)*rpad);
						}
					}
					else{
						for (x=0; x<outputWidth; x++){
							ix = (long long)(x*dW - padW + kw);
							if (ix < 0 || ix >= inputWidth)
								memset(dst+(y*outputWidth+x), 0, sizeof(float)*1);
							else
								memcpy(dst+(y*outputWidth+x), src+(iy*inputWidth+ix), sizeof(float)*(1));
						}
					}
				}
			}
		} else {
			for(y = 0; y < outputHeight; y++) {
				iy = (long long)(y*dH + kh);
				ix = (long long)(0 + kw);
				if (dW == 1)
					memcpy(dst+(y*outputWidth), src+(iy*inputWidth+ix), sizeof(float)*outputWidth);
				else{
					for (x=0; x<outputWidth; x++)
						memcpy(dst+(y*outputWidth+x), src+(iy*inputWidth+ix+x*dW), sizeof(float)*(1));
				}
			}
		}
	}
}
コード例 #18
0
ファイル: thbasic.c プロジェクト: lijian8/thnets
void THFloatTensor_addmm(THFloatTensor *r_, float beta, THFloatTensor *t, float alpha, THFloatTensor *m1, THFloatTensor *m2)
{
	char transpose_r, transpose_m1, transpose_m2;
	THFloatTensor *r__, *m1_, *m2_;

	if( (m1->nDimension != 2) || (m2->nDimension != 2))
		THError("matrices expected, got %dD, %dD tensors", m1->nDimension, m2->nDimension);

	if(m1->size[1] != m2->size[0])
		THError("size mismatch, m1: %ld, m2: %ld", m1->size[1], m2->size[0]);

	if( t->nDimension != 2 )
		THError("matrix expected, got %dD tensor for t", t->nDimension);

	if( (t->size[0] != m1->size[0]) || (t->size[1] != m2->size[1]) )
		THError("size mismatch, t: %ld, m1: %ld, t: %ld, m2: %ld", t->size[0], m1->size[1], t->size[1], m2->size[1]);

	if(t != r_)
		THError("Not implemented: t != r");

	/*  printf("%ldx%ld = %ldx%ld X %ldx%ld\n", r_->size[0], r_->size[1], m1->size[0], m1->size[1], m2->size[0], m2->size[1]); */

	/* r_ */
	if(r_->stride[0] == 1 && r_->stride[1] != 0)
	{
		transpose_r = 'n';
		r__ = r_;
	}
	else if(r_->stride[1] == 1 && r_->stride[0] != 0)
	{
		THFloatTensor *swap = m2;
		m2 = m1;
		m1 = swap;
		transpose_r = 't';
		r__ = r_;
	}
	else
	{
		THError("Transpose not implemented (1)");
		return;
/*		transpose_r = 'n';

		r__ = THFloatTensor_newWithSize2d(r_->size[1], r_->size[0]);
		THFloatTensor_copy(r__, r_);
		THFloatTensor_transpose(r__, NULL, 0, 1);*/
	}

	/* m1 */
	if(m1->stride[(transpose_r == 'n' ? 0 : 1)] == 1 && m1->stride[(transpose_r == 'n' ? 1 : 0)] != 0)
	{
		transpose_m1 = 'n';
		m1_ = m1;
	}
	else if(m1->stride[(transpose_r == 'n' ? 1 : 0)] == 1 && m1->stride[(transpose_r == 'n' ? 0 : 1)] != 0)
	{
		transpose_m1 = 't';
		m1_ = m1;
	}
	else
	{
		THError("Transpose not implemented (2)");
		return;
		/*transpose_m1 = (transpose_r == 'n' ? 't' : 'n');
		m1_ = THFloatTensor_newContiguous(m1);*/
	}

	/* m2 */
	if(m2->stride[(transpose_r == 'n' ? 0 : 1)] == 1 && m2->stride[(transpose_r == 'n' ? 1 : 0)] != 0)
	{
		transpose_m2 = 'n';
		m2_ = m2;
	}
	else if(m2->stride[(transpose_r == 'n' ? 1 : 0)] == 1 && m2->stride[(transpose_r == 'n' ? 0 : 1)] != 0)
	{
		transpose_m2 = 't';
		m2_ = m2;
	}
	else
	{
		THError("Transpose not implemented (3)");
		return;
		/*transpose_m2 = (transpose_r == 'n' ? 't' : 'n');
		m2_ = THFloatTensor_(newContiguous)(m2);*/
	}

	/* do the operation */
	THBlas_gemm(transpose_m1,
		transpose_m2,
		r__->size[(transpose_r == 'n' ? 0 : 1)],
		r__->size[(transpose_r == 'n' ? 1 : 0)],
		m1_->size[(transpose_r == 'n' ? 1 : 0)],
		alpha,
		THFloatTensor_data(m1_),
		(transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]),
		THFloatTensor_data(m2_),
		(transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]),
		beta,
		THFloatTensor_data(r__),
		r__->stride[(transpose_r == 'n' ? 1 : 0)]);

	/* free intermediate variables */
	if(m1_ != m1)
		THFloatTensor_free(m1_);

	if(m2_ != m2)
		THFloatTensor_free(m2_);

	if(r__ != r_)
		THError("freeCopyTo not implemented");
		/*THFloatTensor_(freeCopyTo)(r__, r_);*/
}
コード例 #19
0
ファイル: roi_crop.c プロジェクト: Dtean/faster-rcnn.pytorch
int BilinearSamplerBHWD_updateOutput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *output)
{

  int batchsize = THFloatTensor_size(inputImages, 0);
  int inputImages_height = THFloatTensor_size(inputImages, 1);
  int inputImages_width = THFloatTensor_size(inputImages, 2);
  int output_height = THFloatTensor_size(output, 1);
  int output_width = THFloatTensor_size(output, 2);
  int inputImages_channels = THFloatTensor_size(inputImages, 3);

  int output_strideBatch = THFloatTensor_stride(output, 0);
  int output_strideHeight = THFloatTensor_stride(output, 1);
  int output_strideWidth = THFloatTensor_stride(output, 2);

  int inputImages_strideBatch = THFloatTensor_stride(inputImages, 0);
  int inputImages_strideHeight = THFloatTensor_stride(inputImages, 1);
  int inputImages_strideWidth = THFloatTensor_stride(inputImages, 2);

  int grids_strideBatch = THFloatTensor_stride(grids, 0);
  int grids_strideHeight = THFloatTensor_stride(grids, 1);
  int grids_strideWidth = THFloatTensor_stride(grids, 2);

  real *inputImages_data, *output_data, *grids_data;
  inputImages_data = THFloatTensor_data(inputImages);
  output_data = THFloatTensor_data(output);
  grids_data = THFloatTensor_data(grids);

  int b, yOut, xOut;

  for(b=0; b < batchsize; b++)
  {
    for(yOut=0; yOut < output_height; yOut++)
    {
      for(xOut=0; xOut < output_width; xOut++)
      {
        //read the grid
        real yf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth];
        real xf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + 1];

        // get the weights for interpolation
        int yInTopLeft, xInTopLeft;
        real yWeightTopLeft, xWeightTopLeft;

        real xcoord = (xf + 1) * (inputImages_width - 1) / 2;
        xInTopLeft = floor(xcoord);
        xWeightTopLeft = 1 - (xcoord - xInTopLeft);

        real ycoord = (yf + 1) * (inputImages_height - 1) / 2;
        yInTopLeft = floor(ycoord);
        yWeightTopLeft = 1 - (ycoord - yInTopLeft);



        const int outAddress = output_strideBatch * b + output_strideHeight * yOut + output_strideWidth * xOut;
        const int inTopLeftAddress = inputImages_strideBatch * b + inputImages_strideHeight * yInTopLeft + inputImages_strideWidth * xInTopLeft;
        const int inTopRightAddress = inTopLeftAddress + inputImages_strideWidth;
        const int inBottomLeftAddress = inTopLeftAddress + inputImages_strideHeight;
        const int inBottomRightAddress = inBottomLeftAddress + inputImages_strideWidth;

        real v=0;
        real inTopLeft=0;
        real inTopRight=0;
        real inBottomLeft=0;
        real inBottomRight=0;

        // we are careful with the boundaries
        bool topLeftIsIn = xInTopLeft >= 0 && xInTopLeft <= inputImages_width-1 && yInTopLeft >= 0 && yInTopLeft <= inputImages_height-1;
        bool topRightIsIn = xInTopLeft+1 >= 0 && xInTopLeft+1 <= inputImages_width-1 && yInTopLeft >= 0 && yInTopLeft <= inputImages_height-1;
        bool bottomLeftIsIn = xInTopLeft >= 0 && xInTopLeft <= inputImages_width-1 && yInTopLeft+1 >= 0 && yInTopLeft+1 <= inputImages_height-1;
        bool bottomRightIsIn = xInTopLeft+1 >= 0 && xInTopLeft+1 <= inputImages_width-1 && yInTopLeft+1 >= 0 && yInTopLeft+1 <= inputImages_height-1;

        int t;
        // interpolation happens here
        for(t=0; t<inputImages_channels; t++)
        {
           if(topLeftIsIn) inTopLeft = inputImages_data[inTopLeftAddress + t];
           if(topRightIsIn) inTopRight = inputImages_data[inTopRightAddress + t];
           if(bottomLeftIsIn) inBottomLeft = inputImages_data[inBottomLeftAddress + t];
           if(bottomRightIsIn) inBottomRight = inputImages_data[inBottomRightAddress + t];

           v = xWeightTopLeft * yWeightTopLeft * inTopLeft
             + (1 - xWeightTopLeft) * yWeightTopLeft * inTopRight
             + xWeightTopLeft * (1 - yWeightTopLeft) * inBottomLeft
             + (1 - xWeightTopLeft) * (1 - yWeightTopLeft) * inBottomRight;

           output_data[outAddress + t] = v;
        }

      }
    }
  }

  return 1;
}
コード例 #20
0
ファイル: roi_crop.c プロジェクト: Dtean/faster-rcnn.pytorch
int BilinearSamplerBCHW_updateGradInput(THFloatTensor *inputImages, THFloatTensor *grids, THFloatTensor *gradInputImages,
                                        THFloatTensor *gradGrids, THFloatTensor *gradOutput)
{
  bool onlyGrid=false;

  int batchsize = THFloatTensor_size(inputImages, 0);
  int inputImages_height = THFloatTensor_size(inputImages, 2);
  int inputImages_width = THFloatTensor_size(inputImages, 3);
  int gradOutput_height = THFloatTensor_size(gradOutput, 2);
  int gradOutput_width = THFloatTensor_size(gradOutput, 3);
  int inputImages_channels = THFloatTensor_size(inputImages, 1);

  int gradOutput_strideBatch = THFloatTensor_stride(gradOutput, 0);
  int gradOutput_strideHeight = THFloatTensor_stride(gradOutput, 2);
  int gradOutput_strideWidth = THFloatTensor_stride(gradOutput, 3);
  int gradOutput_strideChannel = THFloatTensor_stride(gradOutput, 1);

  int inputImages_strideBatch = THFloatTensor_stride(inputImages, 0);
  int inputImages_strideHeight = THFloatTensor_stride(inputImages, 2);
  int inputImages_strideWidth = THFloatTensor_stride(inputImages, 3);
  int inputImages_strideChannel = THFloatTensor_stride(inputImages, 1);
    
  int gradInputImages_strideBatch = THFloatTensor_stride(gradInputImages, 0);
  int gradInputImages_strideHeight = THFloatTensor_stride(gradInputImages, 2);
  int gradInputImages_strideWidth = THFloatTensor_stride(gradInputImages, 3);
  int gradInputImages_strideChannel = THFloatTensor_stride(gradInputImages, 1);

  int grids_strideBatch = THFloatTensor_stride(grids, 0);
  int grids_strideHeight = THFloatTensor_stride(grids, 2);
  int grids_strideWidth = THFloatTensor_stride(grids, 3);
  int grids_strideChannel = THFloatTensor_stride(grids, 1);

  int gradGrids_strideBatch = THFloatTensor_stride(gradGrids, 0);
  int gradGrids_strideHeight = THFloatTensor_stride(gradGrids, 2);
  int gradGrids_strideWidth = THFloatTensor_stride(gradGrids, 3);
  int gradGrids_strideChannel = THFloatTensor_stride(gradGrids, 1);

  real *inputImages_data, *gradOutput_data, *grids_data, *gradGrids_data, *gradInputImages_data;
  inputImages_data = THFloatTensor_data(inputImages);
  gradOutput_data = THFloatTensor_data(gradOutput);
  grids_data = THFloatTensor_data(grids);
  gradGrids_data = THFloatTensor_data(gradGrids);
  gradInputImages_data = THFloatTensor_data(gradInputImages);

  int b, yOut, xOut;

  for(b=0; b < batchsize; b++)
  {
    for(yOut=0; yOut < gradOutput_height; yOut++)
    {
      for(xOut=0; xOut < gradOutput_width; xOut++)
      {
        //read the grid
        real xf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth + grids_strideChannel];
        real yf = grids_data[b*grids_strideBatch + yOut*grids_strideHeight + xOut*grids_strideWidth];
        
        // get the weights for interpolation
        int yInTopLeft, xInTopLeft;
        real yWeightTopLeft, xWeightTopLeft;

        real xcoord = (xf + 1) * (inputImages_width - 1) / 2;
        xInTopLeft = floor(xcoord);
        xWeightTopLeft = 1 - (xcoord - xInTopLeft);

        real ycoord = (yf + 1) * (inputImages_height - 1) / 2;
        yInTopLeft = floor(ycoord);
        yWeightTopLeft = 1 - (ycoord - yInTopLeft);


        const int inTopLeftAddress = inputImages_strideBatch * b + inputImages_strideHeight * yInTopLeft + inputImages_strideWidth * xInTopLeft;
        const int inTopRightAddress = inTopLeftAddress + inputImages_strideWidth;
        const int inBottomLeftAddress = inTopLeftAddress + inputImages_strideHeight;
        const int inBottomRightAddress = inBottomLeftAddress + inputImages_strideWidth;

        const int gradInputImagesTopLeftAddress = gradInputImages_strideBatch * b + gradInputImages_strideHeight * yInTopLeft + gradInputImages_strideWidth * xInTopLeft;
        const int gradInputImagesTopRightAddress = gradInputImagesTopLeftAddress + gradInputImages_strideWidth;
        const int gradInputImagesBottomLeftAddress = gradInputImagesTopLeftAddress + gradInputImages_strideHeight;
        const int gradInputImagesBottomRightAddress = gradInputImagesBottomLeftAddress + gradInputImages_strideWidth;

        const int gradOutputAddress = gradOutput_strideBatch * b + gradOutput_strideHeight * yOut + gradOutput_strideWidth * xOut;

        real topLeftDotProduct = 0;
        real topRightDotProduct = 0;
        real bottomLeftDotProduct = 0;
        real bottomRightDotProduct = 0;

        // we are careful with the boundaries
        bool topLeftIsIn = xInTopLeft >= 0 && xInTopLeft <= inputImages_width-1 && yInTopLeft >= 0 && yInTopLeft <= inputImages_height-1;
        bool topRightIsIn = xInTopLeft+1 >= 0 && xInTopLeft+1 <= inputImages_width-1 && yInTopLeft >= 0 && yInTopLeft <= inputImages_height-1;
        bool bottomLeftIsIn = xInTopLeft >= 0 && xInTopLeft <= inputImages_width-1 && yInTopLeft+1 >= 0 && yInTopLeft+1 <= inputImages_height-1;
        bool bottomRightIsIn = xInTopLeft+1 >= 0 && xInTopLeft+1 <= inputImages_width-1 && yInTopLeft+1 >= 0 && yInTopLeft+1 <= inputImages_height-1;

        int t;

        for(t=0; t<inputImages_channels; t++)
        {
           real gradOutValue = gradOutput_data[gradOutputAddress + t * gradOutput_strideChannel];
           if(topLeftIsIn)
           {
              real inTopLeft = inputImages_data[inTopLeftAddress + t * inputImages_strideChannel];
              topLeftDotProduct += inTopLeft * gradOutValue;
              if(!onlyGrid) gradInputImages_data[gradInputImagesTopLeftAddress + t * gradInputImages_strideChannel] += xWeightTopLeft * yWeightTopLeft * gradOutValue;
           }

           if(topRightIsIn)
           {
              real inTopRight = inputImages_data[inTopRightAddress + t * inputImages_strideChannel];
              topRightDotProduct += inTopRight * gradOutValue;
              if(!onlyGrid) gradInputImages_data[gradInputImagesTopRightAddress + t * gradInputImages_strideChannel] += (1 - xWeightTopLeft) * yWeightTopLeft * gradOutValue;
           }

           if(bottomLeftIsIn)
           {
              real inBottomLeft = inputImages_data[inBottomLeftAddress + t * inputImages_strideChannel];
              bottomLeftDotProduct += inBottomLeft * gradOutValue;
              if(!onlyGrid) gradInputImages_data[gradInputImagesBottomLeftAddress + t * gradInputImages_strideChannel] += xWeightTopLeft * (1 - yWeightTopLeft) * gradOutValue;
           }

           if(bottomRightIsIn)
           {
              real inBottomRight = inputImages_data[inBottomRightAddress + t * inputImages_strideChannel];
              bottomRightDotProduct += inBottomRight * gradOutValue;
              if(!onlyGrid) gradInputImages_data[gradInputImagesBottomRightAddress + t * gradInputImages_strideChannel] += (1 - xWeightTopLeft) * (1 - yWeightTopLeft) * gradOutValue;
           }
        }

        xf = - yWeightTopLeft * topLeftDotProduct + yWeightTopLeft * topRightDotProduct - (1-yWeightTopLeft) * bottomLeftDotProduct + (1-yWeightTopLeft) * bottomRightDotProduct;
          
        yf = - xWeightTopLeft * topLeftDotProduct + xWeightTopLeft * bottomLeftDotProduct - (1-xWeightTopLeft) * topRightDotProduct + (1-xWeightTopLeft) * bottomRightDotProduct;
        

        gradGrids_data[b*gradGrids_strideBatch + yOut*gradGrids_strideHeight + xOut*gradGrids_strideWidth + gradGrids_strideChannel] = xf * (inputImages_width-1) / 2;
          
        gradGrids_data[b*gradGrids_strideBatch + yOut*gradGrids_strideHeight + xOut*gradGrids_strideWidth] = yf * (inputImages_height-1) / 2;
        

      }
    }
  }

  return 1;
}
コード例 #21
0
ファイル: fastimage.c プロジェクト: djaegle/torch-toolbox
static int luafunc_load(lua_State *L)
{
	THFloatTensor *t = 0;
	const char *tname = luaT_typename(L, 1);
	int i, index = lua_tointeger(L, 2);

	if(max == 0)
		luaL_error(L, "fastimage.init: call init first");
	if(index > nsizes)
		luaL_error(L, "Invalid size index %d", index);
	index--;
	if(index < 0)
		index = 0;
	if(tname && !strcmp(tname, "torch.FloatTensor"))
	{
		t = luaT_toudata(L, 1, luaT_typenameid(L, "torch.FloatTensor"));
		if(t->nDimension == 4 && t->size[1] == 3)
		{
			if(nsizes == 1)
			{
				sizes[0].width = t->size[3];
				sizes[0].height = t->size[2];
				max = t->size[0];
			} else if(sizes[0].width != t->size[3] || sizes[0].height != t->size[2] ||
				max != t->size[0])
				t = 0;
		} else t = 0;
	}
	if(!index)
	{
		for(i = 0; i < max; i++)
			if(images[i].bitmap)
			{
				free(images[i].bitmap);
				images[i].bitmap = 0;
			}
		for(i = 0; i < max; i++)
		{
			if(loadnextimage(images + i))
				break;
		}
		if(i == 0)
		{
			lprintf("Nothing found\n");
			return 0;
		}
		if(i < max)
		{
			max = i;
			if(t)
				t = THFloatTensor_newNarrow(t, 0, 0, i);
		}	
	}
	for(i = 0; i < max; i++)
	{
		if(nsizes == 1 && (!sizes[0].width || !sizes[0].height))
		{
			lprintf("Set width = %d, height = %d\n", images[i].width, images[i].height);
			sizes[0].width = images[i].width;
			sizes[0].height = images[i].height;
		}
		if(!t)
			t = THFloatTensor_newWithSize4d(max, 3, sizes[index].height, sizes[index].width);
		uint8_t *rescaled = scale(images + i, sizes[index].width, sizes[index].height);
		rgb_tofloat(THFloatTensor_data(t) + i * t->stride[0], t->stride[1], t->stride[2], rescaled, sizes[index].width, sizes[index].height);
		if(rescaled != images[i].bitmap)
			free(rescaled);
		if(nsizes == 1 && images[i].bitmap)
		{
			// It's not necessary to keep all the images in memory, if there is only one size
			free(images[i].bitmap);
			images[i].bitmap = 0;
		}
	}
	lprintf("%d x 3 x %d x %d tensor returned\n", i, sizes[index].height, sizes[index].width);
	luaT_pushudata(L, t, "torch.FloatTensor");
	lua_createtable(L, max, 0);
	for(i = 0; i < max; i++)
	{
		lua_pushinteger(L, i+1);

		lua_createtable(L, 0, 3);
		lua_pushstring(L, "filename");
		lua_pushstring(L, images[i].filename);
		lua_settable(L, -3);
		lua_pushstring(L, "width");
		lua_pushinteger(L, images[i].width);
		lua_settable(L, -3);
		lua_pushstring(L, "height");
		lua_pushinteger(L, images[i].height);
		lua_settable(L, -3);

		lua_settable(L, -3);
	}
	return 2;
}
コード例 #22
0
ファイル: fastimage.c プロジェクト: djaegle/torch-toolbox
static int luafunc_init(lua_State *L)
{
	struct stat st;
	const char *path = lua_tostring(L, 1);
	max = lua_tointeger(L, 2);

	if(!path)
		luaL_error(L, "fastimage.init: path has to be a string");
	if(max < 1)
		luaL_error(L, "fastimage.init: max has to be a positive number");
	strcpy(initpath, path);
	const char *tname = luaT_typename(L, 3);
	if(images)
	{
		int i;

		for(i = 0; i < max; i++)
			if(images[i].bitmap)
				free(images[i].bitmap);
		free(images);
		images = 0;
	}
	if(sizes)
	{
		free(sizes);
		sizes = 0;
	}
	nsizes = 0;
	if(tname && !strcmp(tname, "torch.FloatTensor"))
	{
		THFloatTensor *t = luaT_toudata(L, 3, luaT_typenameid(L, "torch.FloatTensor"));
		if(t->nDimension == 2 && t->size[1] == 2)
		{
			int i;
			nsizes = t->size[0];
			sizes = (imgsize_t *)malloc(nsizes * sizeof(imgsize_t));
			float *data = THFloatTensor_data(t);
			for(i = 0; i < nsizes; i++)
			{
				sizes[i].width = data[i * t->stride[0]];
				sizes[i].height = data[i * t->stride[0] + 1];
			}
			if(lua_isnumber(L, 4))
				greylevel = (int)(255 * lua_tonumber(L, 4));
			else greylevel = -1;
		} else t = 0;
	} else {
		nsizes = 1;
		sizes = (imgsize_t *)malloc(sizeof(imgsize_t));
		sizes[0].width = lua_tointeger(L, 3);
		sizes[0].height = lua_tointeger(L, 4);
		if(lua_isnumber(L, 5))
			greylevel = (int)(255 * lua_tonumber(L, 5));
		else greylevel = -1;
	}
	images = (img_t *)calloc(max, sizeof(img_t));

	lprintf("fastimage.init(%s, %d, %d, %d, %d)\n", path, max, sizes[0].width, sizes[0].height, greylevel);
	terminate = 0;
	if(dir)
	{
		closedir(dir);
		dir = 0;
	}
	if(!stat(path, &st))
	{
		if(S_ISREG(st.st_mode))
			return 0;
		else if(S_ISDIR(st.st_mode))
		{
			lprintf("opendir %s\n", path);
			dir = opendir(path);
			if(!dir)
				luaL_error(L, "fastimage.init: failed to open directory %s", path);
			return 0;
		} else luaL_error(L, "fastimage.init: %s is neither a file, nor a directory", path);
	} else luaL_error(L, "fastimage.init: Cannot stat %s", path);
	return 0;
}
コード例 #23
0
ファイル: SpatialConvolution.c プロジェクト: lijian8/thnets
THFloatTensor *nn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input)
{
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;

	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int dimw = 2;
	int dimh = 1;

	if (input->nDimension == 4)
	{
		dimw++;
		dimh++;
	}
	
	long nOutputPlane = weight->size[0];
	long kW           = weight->size[3];
	long kH           = weight->size[2];
	long inputWidth   = input->size[dimw];
	long inputHeight  = input->size[dimh];
	long outputWidth  = (inputWidth - kW) / dW + 1;
	long outputHeight = (inputHeight - kH) / dH + 1;

	if (input->nDimension == 3)
	{
		long i;
		float *bias_data;
		float *output_data;

		THFloatTensor_resize3d(output, nOutputPlane, outputHeight, outputWidth);
		/* add bias */
		bias_data = THFloatTensor_data(bias);
		output_data = THFloatTensor_data(output);

#pragma omp parallel for private(i)
		for (i=0; i<bias->size[0]; i++)
		{
			float *ptr_output = output_data + i*outputWidth*outputHeight;
			long j;
			for(j = 0; j < outputWidth*outputHeight; j++)
				ptr_output[j] = bias_data[i];
		}
		THFloatTensor_conv2Dmv(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
	}
	else
	{
		float *bias_data;
		float *output_data; 
		long p;

		THFloatTensor_resize4d(output, input->size[0], nOutputPlane, outputHeight, outputWidth);

		bias_data = THFloatTensor_data(bias);
		output_data = THFloatTensor_data(output);

#pragma omp parallel for private(p)
		for (p=0; p<input->size[0]; p++)
		{
			/* BIAS */
			long i;
			for (i=0; i<bias->size[0]; i++)
			{
				float *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
				long j;
				for(j = 0; j < outputWidth*outputHeight; j++)
					ptr_output[j] = bias_data[i];
			}
		}

		/* do convolutions */
		THFloatTensor_conv2Dmm(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
	}
	return output;
}
コード例 #24
0
ファイル: thbasic.c プロジェクト: lijian8/thnets
void THFloatTensor_conv2Dmm(THFloatTensor *r_, float beta, float alpha, THFloatTensor *t_, THFloatTensor *k_, long srow, long scol, const char *vf, const char *xc)
{
	long nInputPlane, nInputRows, nInputCols;
	long nKernelRows, nKernelCols;
	long nOutputPlane, nOutputRows, nOutputCols;
	long kstride0, kstride1;
	THFloatTensor *input;
	THFloatTensor* kernel;
	long nbatch;
	long nelem;
	float *input_data;
	float *weight_data;
	float *output_data;
	long p;

	if(t_->nDimension != 4)
		THError("input: 3D Tensor expected");
	if(k_->nDimension != 4)
		THError("kernel: 4D Tensor expected");
	if(srow < 1)
		THError("Stride should be a positive integer");
	if(scol < 1)
		THError("Stride should be a positive integer");
	if(*vf != 'V' || *xc != 'X')
		THError("Type of convolution can be 'V','X' only");

	input = t_;
	kernel = k_;

	nbatch = input->size[0];
	nInputPlane = input->size[1];
	nInputRows  = input->size[2];
	nInputCols  = input->size[3];

	kstride0    = kernel->stride[0];
	kstride1    = kernel->stride[1];
	nKernelRows = kernel->size[2];
	nKernelCols = kernel->size[3];
	nOutputPlane = kernel->size[0];
	if(kernel->size[1] != nInputPlane)
		THError("invalid number of input planes");

	if(!(nInputRows >= nKernelRows && nInputCols >= nKernelCols))
		THError("conv2Dmv : Input image is smaller than kernel");

	nOutputRows = (nInputRows - nKernelRows) / srow + 1;
	nOutputCols = (nInputCols - nKernelCols) / scol + 1;

	nelem = THFloatTensor_nElement(r_);
	THFloatTensor_resize4d(r_, nbatch, nOutputPlane, nOutputRows, nOutputCols);

	input_data = THFloatTensor_data(input);
	weight_data = THFloatTensor_data(kernel);
	output_data = THFloatTensor_data(r_);

	if (nelem == 0 || beta == 0 || nelem != THFloatTensor_nElement(r_))
	{
		/*THFloatTensor_(zero)(r_);*/
#pragma omp parallel for private(p)
		for (p=0; p < r_->size[0]; p++)
		{
			long k;
			for (k = 0; k < r_->size[1]; k++)
			{
				float* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
				long l;
				for (l = 0; l < nOutputRows*nOutputCols; l++)
					ptr_output[l] = 0.0;
			}
		}
	}
	else if (beta != 1)
	{
		/*THFloatTensor_(mul)(r_, beta);*/
#pragma omp parallel for private(p)
		for(p=0; p < r_->size[0]; p++)
		{
			long k;
			for (k = 0; k < r_->size[1]; k++)
			{
				float* ptr_output = output_data + p*nOutputPlane*nOutputRows*nOutputCols + k*nOutputCols*nOutputRows;
				long l;
				for (l = 0; l < nOutputRows*nOutputCols; l++)
					ptr_output[l] *= beta;
			}
		}
	}

#pragma omp parallel for private(p)
	for(p=0; p < nbatch; p++)
	{
		long k;
		for(k = 0; k < nOutputPlane; k++)
		{
			long i;
			/* get output */
			float *ptr_output = output_data + p*nOutputPlane*nOutputCols*nOutputRows + k*nOutputCols*nOutputRows;
			for(i = 0; i < nInputPlane; i++)
			{
				/* get kernel */
				float *ptr_weight = weight_data + k*kstride0 + i*kstride1;
				/* get input */
				float *ptr_input = input_data + p*nInputPlane*nInputRows*nInputCols + i*nInputRows*nInputCols;

				/* do image, kernel convolution */
				THFloatTensor_validXCorr2Dptr(ptr_output,
					alpha,
					ptr_input,  nInputRows,  nInputCols,
					ptr_weight, nKernelRows, nKernelCols,
					srow, scol);
			}
		}
	}
}
コード例 #25
0
THFloatTensor *cudnn_SpatialConvolution_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialConvolution.kW;
	int kH = module->SpatialConvolution.kH;
	int dW = module->SpatialConvolution.dW;
	int dH = module->SpatialConvolution.dH;
	int padW = module->SpatialConvolution.padW;
	int padH = module->SpatialConvolution.padH;
	int nInputPlane  = module->SpatialConvolution.nInputPlane;
	int nOutputPlane = module->SpatialConvolution.nOutputPlane;

	THFloatTensor *weight = module->SpatialConvolution.weight;
	THFloatTensor *bias = module->SpatialConvolution.bias;
	THFloatTensor *output = module->output;

	int sizes[4];
	int pad[2], filterStride[2], upscale[2];
	cudnnTensorDescriptor_t dinput, dbias, doutput;
	cudnnConvolutionDescriptor_t dconv;
	cudnnFilterDescriptor_t dweight;
	float one = 1, zero = 0;
	size_t reqwssize;
	static void *ws;
	static size_t wssize;
	static const int alg = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;

	pad[0] = padH;
	pad[1] = padW;
	filterStride[0] = dH;
	filterStride[1] = dW;
	upscale[0] = 1;
	upscale[1] = 1;

	if(input->nDimension <= 2)
	{
		// Here we use the SpatialConvolution module to perform a linear transformation
		errcheck(cudnnCreateTensorDescriptor(&dinput));
		if(input->nDimension == 1)
			errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, 1, input->size[0], 1, 1));
		else errcheck(cudnnSetTensor4dDescriptor(dinput, CUDNN_TENSOR_NCHW, floattype, input->size[0], input->size[1], 1, 1));
	} else errcheck(THcudnn_TensorDescriptor(&dinput, input));
	errcheck(cudnnCreateFilterDescriptor(&dweight));
	errcheck(cudnnSetFilter4dDescriptor(dweight, floattype, nOutputPlane, nInputPlane, kH, kW));
	errcheck(cudnnCreateTensorDescriptor(&dbias));
	errcheck(cudnnSetTensor4dDescriptor(dbias, CUDNN_TENSOR_NCHW, floattype, 1, bias->size[0], 1, 1));
	errcheck(cudnnCreateConvolutionDescriptor(&dconv));
	errcheck(cudnnSetConvolutionNdDescriptor(dconv, 2, pad, filterStride, upscale, CUDNN_CROSS_CORRELATION, floattype));
	errcheck(cudnnGetConvolutionNdForwardOutputDim(dconv, dinput, dweight, 4, sizes));
	THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));
	if(alg == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM || alg == CUDNN_CONVOLUTION_FWD_ALGO_GEMM ||
		alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT || alg == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING)
	{
		errcheck(cudnnGetConvolutionForwardWorkspaceSize(THcudnn_getHandle(), dinput, dweight, dconv, doutput, alg, &reqwssize));
		if(reqwssize > wssize)
		{
			wssize = reqwssize;
			errcheck(cudaMalloc(&ws, reqwssize));
		}			
	}
	errcheck(cudnnConvolutionForward(THcudnn_getHandle(), &one, dinput, THFloatTensor_data(input),
		dweight, THFloatTensor_data(weight), dconv, alg, ws, wssize, &zero,
		doutput, THFloatTensor_data(output)));
	errcheck(cudnnAddTensor_v3(THcudnn_getHandle(), &one, dbias, THFloatTensor_data(bias),
		&one, doutput, THFloatTensor_data(output)));
	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyFilterDescriptor(dweight);
	cudnnDestroyTensorDescriptor(dbias);
	cudnnDestroyTensorDescriptor(doutput);
	cudnnDestroyConvolutionDescriptor(dconv);
	return output;
}