Esempio n. 1
0
__host__
float4 make_float4( const Vector4f& v )
{
    return make_float4( v.x, v.y, v.z, v.w );
}
Esempio n. 2
0
template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
Esempio n. 3
0
void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene)
{
	Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading);

	update();

	if(previous_need_motion != need_motion) {
		/* scene's motion model could have been changed since previous device
		 * camera update this could happen for example in case when one render
		 * layer has got motion pass and another not */
		need_device_update = true;
	}

	if(!need_device_update)
		return;
	
	KernelCamera *kcam = &dscene->data.cam;

	/* store matrices */
	kcam->screentoworld = screentoworld;
	kcam->rastertoworld = rastertoworld;
	kcam->rastertocamera = rastertocamera;
	kcam->cameratoworld = cameratoworld;
	kcam->worldtocamera = worldtocamera;
	kcam->worldtoscreen = worldtoscreen;
	kcam->worldtoraster = worldtoraster;
	kcam->worldtondc = worldtondc;

	/* camera motion */
	kcam->have_motion = 0;
	kcam->have_perspective_motion = 0;

	if(need_motion == Scene::MOTION_PASS) {
		/* TODO(sergey): Support perspective (zoom, fov) motion. */
		if(type == CAMERA_PANORAMA) {
			if(use_motion) {
				kcam->motion.pre = transform_inverse(motion.pre);
				kcam->motion.post = transform_inverse(motion.post);
			}
			else {
				kcam->motion.pre = kcam->worldtocamera;
				kcam->motion.post = kcam->worldtocamera;
			}
		}
		else {
			if(use_motion) {
				kcam->motion.pre = cameratoraster * transform_inverse(motion.pre);
				kcam->motion.post = cameratoraster * transform_inverse(motion.post);
			}
			else {
				kcam->motion.pre = worldtoraster;
				kcam->motion.post = worldtoraster;
			}
		}
	}
#ifdef __CAMERA_MOTION__
	else if(need_motion == Scene::MOTION_BLUR) {
		if(use_motion) {
			transform_motion_decompose((DecompMotionTransform*)&kcam->motion, &motion, &matrix);
			kcam->have_motion = 1;
		}
		if(use_perspective_motion) {
			kcam->perspective_motion = perspective_motion;
			kcam->have_perspective_motion = 1;
		}
	}
#endif

	/* depth of field */
	kcam->aperturesize = aperturesize;
	kcam->focaldistance = focaldistance;
	kcam->blades = (blades < 3)? 0.0f: blades;
	kcam->bladesrotation = bladesrotation;

	/* motion blur */
#ifdef __CAMERA_MOTION__
	kcam->shuttertime = (need_motion == Scene::MOTION_BLUR) ? shuttertime: -1.0f;

	if(need_motion == Scene::MOTION_BLUR) {
		vector<float> shutter_table;
		util_cdf_inverted(SHUTTER_TABLE_SIZE,
		                  0.0f,
		                  1.0f,
		                  function_bind(shutter_curve_eval, _1, shutter_curve),
		                  false,
		                  shutter_table);
		shutter_table_offset = scene->lookup_tables->add_table(dscene,
		                                                       shutter_table);
		kcam->shutter_table_offset = (int)shutter_table_offset;
	}
	else if(shutter_table_offset != TABLE_OFFSET_INVALID) {
		scene->lookup_tables->remove_table(shutter_table_offset);
		shutter_table_offset = TABLE_OFFSET_INVALID;
	}
#else
	kcam->shuttertime = -1.0f;
#endif

	/* type */
	kcam->type = type;

	/* anamorphic lens bokeh */
	kcam->inv_aperture_ratio = 1.0f / aperture_ratio;

	/* panorama */
	kcam->panorama_type = panorama_type;
	kcam->fisheye_fov = fisheye_fov;
	kcam->fisheye_lens = fisheye_lens;
	kcam->equirectangular_range = make_float4(longitude_min - longitude_max, -longitude_min,
	                                          latitude_min -  latitude_max, -latitude_min + M_PI_2_F);

	/* sensor size */
	kcam->sensorwidth = sensorwidth;
	kcam->sensorheight = sensorheight;

	/* render size */
	kcam->width = width;
	kcam->height = height;
	kcam->resolution = resolution;

	/* store differentials */
	kcam->dx = float3_to_float4(dx);
	kcam->dy = float3_to_float4(dy);

	/* clipping */
	kcam->nearclip = nearclip;
	kcam->cliplength = (farclip == FLT_MAX)? FLT_MAX: farclip - nearclip;

	/* Camera in volume. */
	kcam->is_inside_volume = 0;

	/* Rolling shutter effect */
	kcam->rolling_shutter_type = rolling_shutter_type;
	kcam->rolling_shutter_duration = rolling_shutter_duration;

	previous_need_motion = need_motion;
}
Esempio n. 4
0
int main(int argc, char** argv)
{
  std::cout << "Starting iu_image_gpu_unittest ..." << std::endl;

  // test image size
  IuSize sz(79,63);

  iu::ImageGpu_8u_C1 im_gpu_8u_C1(sz);
  iu::ImageGpu_8u_C2 im_gpu_8u_C2(sz);
  iu::ImageGpu_8u_C3 im_gpu_8u_C3(sz);
  iu::ImageGpu_8u_C4 im_gpu_8u_C4(sz);
  iu::ImageGpu_32f_C1 im_gpu_32f_C1(sz);
  iu::ImageGpu_32f_C2 im_gpu_32f_C2(sz);
  iu::ImageGpu_32f_C3 im_gpu_32f_C3(sz);
  iu::ImageGpu_32f_C4 im_gpu_32f_C4(sz);

  unsigned char set_value_8u_C1 = 1;
  uchar2 set_value_8u_C2 = make_uchar2(2,2);
  uchar3 set_value_8u_C3 = make_uchar3(3,3,3);
  uchar4 set_value_8u_C4 = make_uchar4(4,4,4,4);
  float set_value_32f_C1 = 1.1f;
  float2 set_value_32f_C2 = make_float2(2.2f);
  float3 set_value_32f_C3 = make_float3(3.3f);
  float4 set_value_32f_C4 = make_float4(4.4f);

  // copy values back to cpu to compare the set values
  iu::ImageCpu_8u_C1 im_cpu_8u_C1(sz);
  iu::ImageCpu_8u_C2 im_cpu_8u_C2(sz);
  iu::ImageCpu_8u_C3 im_cpu_8u_C3(sz);
  iu::ImageCpu_8u_C4 im_cpu_8u_C4(sz);
  iu::ImageCpu_32f_C1 im_cpu_32f_C1(sz);
  iu::ImageCpu_32f_C2 im_cpu_32f_C2(sz);
  iu::ImageCpu_32f_C3 im_cpu_32f_C3(sz);
  iu::ImageCpu_32f_C4 im_cpu_32f_C4(sz);


  // set values on cpu and copy to gpu and back again
  {
    std::cout << "Testing copy. setValue on cpu (should work because of previous test) and copy forth and back" << std::endl;

    iu::setValue(set_value_8u_C1, &im_cpu_8u_C1, im_cpu_8u_C1.roi());
    iu::setValue(set_value_8u_C2, &im_cpu_8u_C2, im_cpu_8u_C2.roi());
    iu::setValue(set_value_8u_C3, &im_cpu_8u_C3, im_cpu_8u_C3.roi());
    iu::setValue(set_value_8u_C4, &im_cpu_8u_C4, im_cpu_8u_C4.roi());
    iu::setValue(set_value_32f_C1, &im_cpu_32f_C1, im_cpu_32f_C1.roi());
    iu::setValue(set_value_32f_C2, &im_cpu_32f_C2, im_cpu_32f_C2.roi());
    iu::setValue(set_value_32f_C3, &im_cpu_32f_C3, im_cpu_32f_C3.roi());
    iu::setValue(set_value_32f_C4, &im_cpu_32f_C4, im_cpu_32f_C4.roi());

    std::cout << "  copy cpu -> gpu ..." << std::endl;
    iu::copy(&im_cpu_8u_C1, &im_gpu_8u_C1);
    iu::copy(&im_cpu_8u_C2, &im_gpu_8u_C2);
    iu::copy(&im_cpu_8u_C3, &im_gpu_8u_C3);
    iu::copy(&im_cpu_8u_C4, &im_gpu_8u_C4);
    iu::copy(&im_cpu_32f_C1, &im_gpu_32f_C1);
    iu::copy(&im_cpu_32f_C2, &im_gpu_32f_C2);
    iu::copy(&im_cpu_32f_C3, &im_gpu_32f_C3);
    iu::copy(&im_cpu_32f_C4, &im_gpu_32f_C4);
    std::cout << "  copy gpu -> cpu ..." << std::endl;
    iu::copy(&im_gpu_8u_C1, &im_cpu_8u_C1);
    iu::copy(&im_gpu_8u_C2, &im_cpu_8u_C2);
    iu::copy(&im_gpu_8u_C3, &im_cpu_8u_C3);
    iu::copy(&im_gpu_8u_C4, &im_cpu_8u_C4);
    iu::copy(&im_gpu_32f_C1, &im_cpu_32f_C1);
    iu::copy(&im_gpu_32f_C2, &im_cpu_32f_C2);
    iu::copy(&im_gpu_32f_C3, &im_cpu_32f_C3);
    iu::copy(&im_gpu_32f_C4, &im_cpu_32f_C4);

    std::cout << "  check copied values on cpu ..." << std::endl;
    // check if set values are correct
    for (unsigned int y = 0; y<sz.height; ++y)
    {
      for (unsigned int x = 0; x<sz.width; ++x)
      {
        // 8-bit
        if( *im_cpu_8u_C1.data(x,y) != set_value_8u_C1)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C2.data(x,y) != set_value_8u_C2)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C3.data(x,y) != set_value_8u_C3)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C4.data(x,y) != set_value_8u_C4)
          return EXIT_FAILURE;

        // 32-bit
        if( *im_cpu_32f_C1.data(x,y) != set_value_32f_C1)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C2.data(x,y) != set_value_32f_C2)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C3.data(x,y) != set_value_32f_C3)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C4.data(x,y) != set_value_32f_C4)
          return EXIT_FAILURE;
      }
    }
  }

  // set values on gpu
  {
    std::cout << "Testing setValue on gpu (implecitely testing copy gpu->cpu) ..." << std::endl;

    iu::setValue(set_value_8u_C1, &im_gpu_8u_C1, im_gpu_8u_C1.roi());
    iu::setValue(set_value_8u_C2, &im_gpu_8u_C2, im_gpu_8u_C2.roi());
    iu::setValue(set_value_8u_C3, &im_gpu_8u_C3, im_gpu_8u_C3.roi());
    iu::setValue(set_value_8u_C4, &im_gpu_8u_C4, im_gpu_8u_C4.roi());
    iu::setValue(set_value_32f_C1, &im_gpu_32f_C1, im_gpu_32f_C1.roi());
    iu::setValue(set_value_32f_C2, &im_gpu_32f_C2, im_gpu_32f_C2.roi());
    iu::setValue(set_value_32f_C3, &im_gpu_32f_C3, im_gpu_32f_C3.roi());
    iu::setValue(set_value_32f_C4, &im_gpu_32f_C4, im_gpu_32f_C4.roi());

    std::cout << "Copy gpu images to cpu for checking the set values." << std::endl;
    iu::copy(&im_gpu_8u_C1, &im_cpu_8u_C1);
    iu::copy(&im_gpu_8u_C2, &im_cpu_8u_C2);
    iu::copy(&im_gpu_8u_C3, &im_cpu_8u_C3);
    iu::copy(&im_gpu_8u_C4, &im_cpu_8u_C4);
    iu::copy(&im_gpu_32f_C1, &im_cpu_32f_C1);
    iu::copy(&im_gpu_32f_C2, &im_cpu_32f_C2);
    iu::copy(&im_gpu_32f_C3, &im_cpu_32f_C3);
    iu::copy(&im_gpu_32f_C4, &im_cpu_32f_C4);

    // check if set values are correct
    for (unsigned int y = 0; y<sz.height; ++y)
    {
      for (unsigned int x = 0; x<sz.width; ++x)
      {
        // 8-bit
        if( *im_cpu_8u_C1.data(x,y) != set_value_8u_C1)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C2.data(x,y) != set_value_8u_C2)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C3.data(x,y) != set_value_8u_C3)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C4.data(x,y) != set_value_8u_C4)
          return EXIT_FAILURE;

        // 32-bit
        if( *im_cpu_32f_C1.data(x,y) != set_value_32f_C1)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C2.data(x,y) != set_value_32f_C2)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C3.data(x,y) != set_value_32f_C3)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C4.data(x,y) != set_value_32f_C4)
          return EXIT_FAILURE;
      }
    }
  }

  // copy gpu -> gpu test
  {
    std::cout << "testing copy gpu -> gpu  ..." << std::endl;

    iu::ImageGpu_8u_C1 cp_gpu_8u_C1(sz);
    iu::ImageGpu_8u_C2 cp_gpu_8u_C2(sz);
    iu::ImageGpu_8u_C3 cp_gpu_8u_C3(sz);
    iu::ImageGpu_8u_C4 cp_gpu_8u_C4(sz);
    iu::ImageGpu_32f_C1 cp_gpu_32f_C1(sz);
    iu::ImageGpu_32f_C2 cp_gpu_32f_C2(sz);
    iu::ImageGpu_32f_C3 cp_gpu_32f_C3(sz);
    iu::ImageGpu_32f_C4 cp_gpu_32f_C4(sz);

    iu::copy(&im_gpu_8u_C1, &cp_gpu_8u_C1);
    iu::copy(&im_gpu_8u_C2, &cp_gpu_8u_C2);
    iu::copy(&im_gpu_8u_C3, &cp_gpu_8u_C3);
    iu::copy(&im_gpu_8u_C4, &cp_gpu_8u_C4);
    iu::copy(&im_gpu_32f_C1, &cp_gpu_32f_C1);
    iu::copy(&im_gpu_32f_C2, &cp_gpu_32f_C2);
    iu::copy(&im_gpu_32f_C3, &cp_gpu_32f_C3);
    iu::copy(&im_gpu_32f_C4, &cp_gpu_32f_C4);

    iu::copy(&cp_gpu_8u_C1, &im_cpu_8u_C1);
    iu::copy(&cp_gpu_8u_C2, &im_cpu_8u_C2);
    iu::copy(&cp_gpu_8u_C3, &im_cpu_8u_C3);
    iu::copy(&cp_gpu_8u_C4, &im_cpu_8u_C4);
    iu::copy(&cp_gpu_32f_C1, &im_cpu_32f_C1);
    iu::copy(&cp_gpu_32f_C2, &im_cpu_32f_C2);
    iu::copy(&cp_gpu_32f_C3, &im_cpu_32f_C3);
    iu::copy(&cp_gpu_32f_C4, &im_cpu_32f_C4);

    // check if set values are correct
    for (unsigned int y = 0; y<sz.height; ++y)
    {
      for (unsigned int x = 0; x<sz.width; ++x)
      {
        // 8-bit
        if( *im_cpu_8u_C1.data(x,y) != set_value_8u_C1)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C2.data(x,y) != set_value_8u_C2)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C3.data(x,y) != set_value_8u_C3)
          return EXIT_FAILURE;
        if( *im_cpu_8u_C4.data(x,y) != set_value_8u_C4)
          return EXIT_FAILURE;

        // 32-bit
        if( *im_cpu_32f_C1.data(x,y) != set_value_32f_C1)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C2.data(x,y) != set_value_32f_C2)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C3.data(x,y) != set_value_32f_C3)
          return EXIT_FAILURE;
        if( *im_cpu_32f_C4.data(x,y) != set_value_32f_C4)
          return EXIT_FAILURE;
      }
    }
  }


  std::cout << std::endl;
  std::cout << "**************************************************************************" << std::endl;
  std::cout << "*  Everything seem to be ok. -- All assertions passed.                   *" << std::endl;
  std::cout << "*  Look at the images and close the windows to derminate the unittests.  *" << std::endl;
  std::cout << "**************************************************************************" << std::endl;
  std::cout << std::endl;

  return EXIT_SUCCESS;
}
Esempio n. 5
0
static float4 scalarToCudaType(const cv::Scalar& in)
{
  return make_float4((float)in[0], (float)in[1], (float)in[2], (float)in[3]);
}
Esempio n. 6
0
void BVHBuild::rotate(BVHNode *node, int max_depth)
{
    /* nothing to rotate if we reached a leaf node. */
    if(node->is_leaf() || max_depth < 0)
        return;

    InnerNode *parent = (InnerNode*)node;

    /* rotate all children first */
    for(size_t c = 0; c < 2; c++)
        rotate(parent->children[c], max_depth-1);

    /* compute current area of all children */
    BoundBox bounds0 = parent->children[0]->m_bounds;
    BoundBox bounds1 = parent->children[1]->m_bounds;

    float area0 = bounds0.half_area();
    float area1 = bounds1.half_area();
    float4 child_area = make_float4(area0, area1, 0.0f, 0.0f);

    /* find best rotation. we pick a target child of a first child, and swap
     * this with an other child. we perform the best such swap. */
    float best_cost = FLT_MAX;
    int best_child = -1, best_target = -1, best_other = -1;

    for(size_t c = 0; c < 2; c++) {
        /* ignore leaf nodes as we cannot descent into */
        if(parent->children[c]->is_leaf())
            continue;

        InnerNode *child = (InnerNode*)parent->children[c];
        BoundBox& other = (c == 0)? bounds1: bounds0;

        /* transpose child bounds */
        BoundBox target0 = child->children[0]->m_bounds;
        BoundBox target1 = child->children[1]->m_bounds;

        /* compute cost for both possible swaps */
        float cost0 = merge(other, target1).half_area() - child_area[c];
        float cost1 = merge(target0, other).half_area() - child_area[c];

        if(min(cost0,cost1) < best_cost) {
            best_child = (int)c;
            best_other = (int)(1-c);

            if(cost0 < cost1) {
                best_cost = cost0;
                best_target = 0;
            }
            else {
                best_cost = cost0;
                best_target = 1;
            }
        }
    }

    /* if we did not find a swap that improves the SAH then do nothing */
    if(best_cost >= 0)
        return;

    assert(best_child == 0 || best_child == 1);
    assert(best_target != -1);

    /* perform the best found tree rotation */
    InnerNode *child = (InnerNode*)parent->children[best_child];

    swap(parent->children[best_other], child->children[best_target]);
    child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds);
}
void
Projector::drawFrustum()
{
    static const float4 akPoints[24] = 
    { 
        make_float4(-1.0f, -1.0f, -1.0f, 1.0f),
        make_float4(+1.0f, -1.0f, -1.0f, 1.0f),
        make_float4(-1.0f, -1.0f, -1.0f, 1.0f),
        make_float4(-1.0f, +1.0f, -1.0f, 1.0f),
        make_float4(+1.0f, -1.0f, -1.0f, 1.0f),
        make_float4(+1.0f, +1.0f, -1.0f, 1.0f),
        make_float4(-1.0f, +1.0f, -1.0f, 1.0f),
        make_float4(+1.0f, +1.0f, -1.0f, 1.0f),

        make_float4(-1.0f, -1.0f, +1.0f, 1.0f),
        make_float4(+1.0f, -1.0f, +1.0f, 1.0f),
        make_float4(-1.0f, -1.0f, +1.0f, 1.0f),
        make_float4(-1.0f, +1.0f, +1.0f, 1.0f),
        make_float4(+1.0f, -1.0f, +1.0f, 1.0f),
        make_float4(+1.0f, +1.0f, +1.0f, 1.0f),
        make_float4(-1.0f, +1.0f, +1.0f, 1.0f),
        make_float4(+1.0f, +1.0f, +1.0f, 1.0f),

        make_float4(-1.0f, -1.0f, -1.0f, 1.0f),
        make_float4(-1.0f, -1.0f, +1.0f, 1.0f),
        make_float4(+1.0f, -1.0f, -1.0f, 1.0f),
        make_float4(+1.0f, -1.0f, +1.0f, 1.0f),
        make_float4(-1.0f, +1.0f, -1.0f, 1.0f),
        make_float4(-1.0f, +1.0f, +1.0f, 1.0f),
        make_float4(+1.0f, +1.0f, -1.0f, 1.0f),
        make_float4(+1.0f, +1.0f, +1.0f, 1.0f)
    };

    float16 kInvViewPrj = inverse((m_kModelViewMatrix * m_kProjectionMatrix));

    glPushAttrib(GL_LIGHTING_BIT);
    glPushAttrib(GL_CURRENT_BIT);
    glDisable(GL_DEPTH_TEST);

    glPushMatrix();
    {
        glPointSize(10.0f);
        glColor3f(1.0f, 1.0f, 0.0f);
        glBegin(GL_POINTS);
        for(uint i = 0; i < 4; i ++)
        {
            float4 kP = m_akCorners[i];
            kP /= kP.w;
            
            glVertex3f(kP.x, kP.y, kP.z);
        }
        glEnd();

        glBegin(GL_LINES);
        for(uint i = 0; i < 4; i ++)
        {
            uint u = i;
            uint v = (i >= 3) ? 0 : (i + 1);

            float4 kP0 = m_akCorners[u];
            float4 kP1 = m_akCorners[v];
            
            kP0 /= kP0.w;
            kP1 /= kP1.w;
            
            glVertex3f(kP0.x, kP0.y, kP0.z);
            glVertex3f(kP1.x, kP1.y, kP1.z);
        }
        glEnd();

        glColor3f(1.0f, 0.0f, 0.0f);
        glBegin(GL_LINES);
        for(uint i = 0; i < 24; i ++)
        {
            float4 kP = kInvViewPrj * akPoints[i];
            kP /= kP.w;
            
            glVertex3f(kP.x, kP.y, kP.z);
        }
        glEnd();
    }
    glPopMatrix();
    
    glEnable(GL_DEPTH_TEST);
    glPopAttrib();
    glPopAttrib();
}
// main entry point
int main( int argc, char** argv )
{
	printf("imagenet-console\n  args (%i):  ", argc);
	
	for( int i=0; i < argc; i++ )
		printf("%i [%s]  ", i, argv[i]);
		
	printf("\n\n");
	
	
	// retrieve filename argument
	if( argc < 2 )
	{
		printf("imagenet-console:   input image filename required\n");
		return 0;
	}
	
	const char* imgFilename = argv[1];
	

	// create imageNet
	imageNet* net = imageNet::Create(argc, argv);

	if( !net )
	{
		printf("imagenet-console:   failed to initialize imageNet\n");
		return 0;
	}
	
	net->EnableProfiler();
	
	// load image from file on disk
	float* imgCPU    = NULL;
	float* imgCUDA   = NULL;
	int    imgWidth  = 0;
	int    imgHeight = 0;
		
	if( !loadImageRGBA(imgFilename, (float4**)&imgCPU, (float4**)&imgCUDA, &imgWidth, &imgHeight) )
	{
		printf("failed to load image '%s'\n", imgFilename);
		return 0;
	}

	float confidence = 0.0f;
	
	// classify image
	const int img_class = net->Classify(imgCUDA, imgWidth, imgHeight, &confidence);
	
	if( img_class >= 0 )
	{
		printf("imagenet-console:  '%s' -> %2.5f%% class #%i (%s)\n", imgFilename, confidence * 100.0f, img_class, net->GetClassDesc(img_class));
	
		if( argc > 2 )
		{
			const char* outputFilename = argv[2];
			
			// overlay the classification on the image
			cudaFont* font = cudaFont::Create();
			
			if( font != NULL )
			{
				char str[512];
				sprintf(str, "%2.3f%% %s", confidence * 100.0f, net->GetClassDesc(img_class));

				const int overlay_x = 10;
				const int overlay_y = 10;
				const int px_offset = overlay_y * imgWidth * 4 + overlay_x * 4;

				// if the image has a white background, use black text (otherwise, white)
				const float white_cutoff = 225.0f;
				bool white_background = false;

				if( imgCPU[px_offset] > white_cutoff && imgCPU[px_offset + 1] > white_cutoff && imgCPU[px_offset + 2] > white_cutoff )
					white_background = true;

				// overlay the text on the image
				font->RenderOverlay((float4*)imgCUDA, (float4*)imgCUDA, imgWidth, imgHeight, (const char*)str, 10, 10,
								white_background ? make_float4(0.0f, 0.0f, 0.0f, 255.0f) : make_float4(255.0f, 255.0f, 255.0f, 255.0f));
			}
			
			printf("imagenet-console:  attempting to save output image to '%s'\n", outputFilename);
			
			if( !saveImageRGBA(outputFilename, (float4*)imgCPU, imgWidth, imgHeight) )
				printf("imagenet-console:  failed to save output image to '%s'\n", outputFilename);
			else
				printf("imagenet-console:  completed saving '%s'\n", outputFilename);
		}
	}
	else
		printf("imagenet-console:  failed to classify '%s'  (result=%i)\n", imgFilename, img_class);
	
	printf("\nshutting down...\n");
	CUDA(cudaFreeHost(imgCPU));
	delete net;
	return 0;
}
Esempio n. 9
0
static void mikk_set_tangent_space(const SMikkTSpaceContext *context, const float T[], const float sign, const int face, const int vert)
{
	MikkUserData *userdata = (MikkUserData*)context->m_pUserData;

	userdata->tangent[face*4 + vert] = make_float4(T[0], T[1], T[2], sign);
}
Esempio n. 10
0
float4 
Projector::findProjectedRange(
    uint uiIntersectionCount,
    float4 akIntersections[32],
    const float16 &rkViewPrj)
{
    GLint aiViewport[4];
    glGetIntegerv(GL_VIEWPORT, aiViewport);
    
    uint uiProjectedCount = 0;
    float4 akProjected[32];
   
    for( uint i = 0; i < uiIntersectionCount; i++)
    {
        float4 kDW = rkViewPrj * akIntersections[i];
        kDW /= kDW.w;
        
        float dWX = kDW.x;
        float dWY = kDW.y;
        float dWZ = kDW.z;
        
        float4 kP;
        kP.x = (2.0 * (dWX - aiViewport[0]) / aiViewport[2]) - 1.0;
        kP.y = (2.0 * (dWY - aiViewport[1]) / aiViewport[3]) - 1.0;
        kP.z = 2.0 * dWZ - 1.0;
        kP.w = 1.0;
        
        akProjected[uiProjectedCount++] = kP;
    }     
    
    float2 kMin = make_float2(+1.0f, +1.0f);
    float2 kMax = make_float2(-1.0f, -1.0f);

    if(uiProjectedCount > 0)
    {       
        for( uint i = 0; i < uiProjectedCount; i++)
        {
            if(akProjected[i].x < kMin.x) kMin.x = akProjected[i].x;
            if(akProjected[i].y < kMin.y) kMin.y = akProjected[i].y;

            if(akProjected[i].x > kMax.x) kMax.x = akProjected[i].x;
            if(akProjected[i].y > kMax.y) kMax.y = akProjected[i].y;
        }        
        
        kMin.x -= 0.01f;
        kMin.y -= 0.01f;

        kMax.x += 0.01f;
        kMax.y += 0.01f;
    }
    else
    {
        kMin = make_float2(-1.0f, -1.0f);
        kMax = make_float2(+1.0f, +1.0f);
    }

    if(kMin.x < -1.0f) kMin.x = -1.0f;
    if(kMin.y < -1.0f) kMin.y = -1.0f;
    
    if(kMax.x < +1.0f) kMax.x = +1.0f;
    if(kMax.y < +1.0f) kMax.y = +1.0f;
    
    if(fabs(kMin.x - kMax.x) < 0.000001)
    {
        kMin.x = -1.0;
        kMax.x = +1.0;
    }
    
    if(fabs(kMin.y - kMax.y) < 0.000001)
    {
        kMin.y = -1.0;
        kMax.y = +1.0;
    }
    
    float4 kRange = make_float4(kMin.x, kMin.y, kMax.x, kMax.y);
    return kRange;
}
bool
BiotSavartSolver::m_ParticleToMesh()
{
	m_SpatialHasher_vort.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx,
		make_float3(m_origin.x,m_origin.y,m_origin.z),
		m_N_vort);
	m_SpatialHasher_vort.setHashParam();
	m_SpatialHasher_vort.doSpatialHash(m_p_vortPos->getDevicePtr(),m_N_vort);

	m_p_vortPos_Reorder->memset(make_float4(0,0,0,0));
	m_SpatialHasher_vort.reorderData(m_N_vort, (void*)(m_p_vortPos->getDevicePtr()),
		(void*)(m_p_vortPos_Reorder->getDevicePtr()), 4, 1);


	for(int i=0;i<NUM_COMPONENTS;i++)
	{
		m_particle_vort_Reorder[i]->memset(0);
		m_SpatialHasher_vort.reorderData(m_N_vort, (void*)(m_particle_vort[i]->getDevicePtr()),
			(void*)(m_particle_vort_Reorder[i]->getDevicePtr()), 1, 2);

	}

	for (int c=0;c<NUM_COMPONENTS;c++)
	{
		m_grid_vort[c]->memset(0);
		ParticleToMesh(m_SpatialHasher_vort.getStartTable(),
			m_SpatialHasher_vort.getEndTable(),
			m_p_vortPos_Reorder->getDevicePtr(),
			m_particle_vort_Reorder[c]->getDevicePtr(),
			m_SpatialHasher_vort.getCellSize().x,
			m_grid_vort[c]->getDevicePtr(),
			make_uint3(m_gridx,m_gridy,m_gridz),
			make_uint3(m_gridx,m_gridy,m_gridz),
			m_N_vort,
			m_origin);
		cudaMemcpy(m_grid_Rhs[c]->getDevicePtr(),
			m_grid_vort[c]->getDevicePtr(),
			m_grid_Rhs[c]->getSize()*m_grid_Rhs[c]->typeSize(),
			cudaMemcpyDeviceToDevice);
		ComputeRHS(m_grid_Rhs[c]->getDevicePtr(),
			m_SpatialHasher_vort.getCellSize().x*m_SpatialHasher_vort.getCellSize().x,
			-1.0,
			m_gridx*m_gridy*m_gridz);
		//m_p_vortPos_Reorder->copy(GpuArrayf4::DEVICE_TO_HOST);
		//m_particle_vort_Reorder[c]->copy(GpuArrayd::DEVICE_TO_HOST);
		//double total_weight = 0;
		//double total_mass = 0;
		//for(int i=0; i<m_N_vort; i++)
		//{
		//	double *host = m_particle_vort_Reorder[c]->getHostPtr();
		//	total_weight += fabs(host[i]);
		//	total_mass += host[i];
		//}
		//double cx=0, cy=0, cz=0;
		//for(int i=0; i<m_N_vort; i++)
		//{
		//	float4 *hpos = m_p_vortPos_Reorder->getHostPtr();
		//	double *hmass = m_particle_vort_Reorder[c]->getHostPtr();
		//	cx+=hpos[i].x*fabs(hmass[i]);
		//	cy+=hpos[i].y*fabs(hmass[i]);
		//	cz+=hpos[i].z*fabs(hmass[i]);
		//	//printf("%f,%f,%f\n",cx,cy,cz);
		//}
		//cx=cx/total_weight;
		//cy=cy/total_weight;
		//cz=cz/total_weight;

		//m_center.x = cx;
		//m_center.y = cy;
		//m_center.z = cz;
		//m_total_vort[c] = total_mass;
		////printf("%f,%f,%f,%f\n",cx,cy,cz,total_mass);
		//applyDirichlet(m_grid_Rhs[c]->getDevicePtr(), 
		//	make_double4(cx,cy,cz,0),
		//	total_mass,
		//	m_origin,
		//	m_SpatialHasher_vort.getCellSize().x,
		//	m_gridx,
		//	m_gridy,
		//	m_gridz);
	}


	return true;

}
bool
BiotSavartSolver::initializeSolver(uint gdx, uint gdy, uint gdz, bool isVIC, int K, uint M, uint N)
{
	m_isVIC = isVIC;
	m_K = K;
	if(m_initialized)
	{
		if(gdx==m_gridx && gdy==m_gridy && gdz==m_gridz && M==m_M_eval && N==m_N_vort)
		{
			//zerofy memory
			m_evalPos->memset(make_float4(0,0,0,0));
			
			m_evalPos_Reorder->memset(make_float4(0,0,0,0));

			
			m_p_vortPos->memset(make_float4(0,0,0,0));
			
			m_p_vortPos_Reorder->memset(make_float4(0,0,0,0));

			for (int i=0;i<NUM_COMPONENTS;i++)
			{
				
				m_grid_Rhs[i]->memset(0);

				
				m_particle_vort[i]->memset(0);

				
				m_particle_vort_Reorder[i]->memset(0);

				
				m_grid_vort[i]->memset(0);

				
				m_grid_Psi[i]->memset(0);

				
				m_particle_U[i]->memset(0);

				
				m_particle_U_deorder[i]->memset(0);

				
				m_grid_U[i]->memset(0);

				if(!m_isVIC)
					m_far_U[i]->memset(0);
			}

			
		}
		else //just reinitialize everything
		{
			m_gridx = gdx;
			m_gridy = gdy;
			m_gridz = gdz;
			m_M_eval = M;
			m_N_vort = N;

			
			m_PoissonSolver.m_InitialSystem(m_gridx, m_gridy, m_gridz);
			if(!m_isVIC){

				m_SpatialHasher_eval.endSpatialHash();
				m_SpatialHasher_eval.initSpatialHash(m_M_eval, m_gridx,m_gridy,m_gridz);
			}
			m_SpatialHasher_vort.endSpatialHash();
			m_SpatialHasher_vort.initSpatialHash(m_N_vort, m_gridx,m_gridy,m_gridz);


			m_evalPos->free();
			m_evalPos->alloc(m_M_eval);
			m_evalPos->memset(make_float4(0,0,0,0));
			m_evalPos_Reorder->free();
			m_evalPos_Reorder->alloc(m_M_eval);
			m_evalPos_Reorder->memset(make_float4(0,0,0,0));

			m_p_vortPos->free();
			m_p_vortPos->alloc(m_N_vort);
			m_p_vortPos->memset(make_float4(0,0,0,0));
			m_p_vortPos_Reorder->free();
			m_p_vortPos_Reorder->alloc(m_N_vort);
			m_p_vortPos_Reorder->memset(make_float4(0,0,0,0));

			for (int i=0;i<NUM_COMPONENTS;i++)
			{
				m_grid_Rhs[i]->free();
				m_grid_Rhs[i]->alloc(m_gridx*m_gridy*m_gridz);
				m_grid_Rhs[i]->memset(0);
				m_particle_vort[i]->free();
				m_particle_vort[i]->alloc(m_N_vort);
				m_particle_vort[i]->memset(0);
				m_particle_vort_Reorder[i]->free();
				m_particle_vort_Reorder[i]->alloc(m_N_vort);
				m_particle_vort_Reorder[i]->memset(0);
				m_grid_vort[i]->free();
				m_grid_vort[i]->alloc(m_gridx*m_gridy*m_gridz);
				m_grid_vort[i]->memset(0);
				m_grid_Psi[i]->free();
				m_grid_Psi[i]->alloc(m_gridx*m_gridy*m_gridz);
				m_grid_Psi[i]->memset(0);
				m_particle_U[i]->free();
				m_particle_U[i]->alloc(m_M_eval);
				m_particle_U[i]->memset(0);
				m_particle_U_deorder[i]->free();
				m_particle_U_deorder[i]->alloc(m_M_eval);
				m_particle_U_deorder[i]->memset(0);
				m_grid_U[i]->free();
				m_grid_U[i]->alloc(m_gridx*m_gridy*m_gridz);
				m_grid_U[i]->memset(0);
				if(!m_isVIC){

					m_far_U[i]->free();
					m_far_U[i]->alloc(m_gridx*m_gridy*m_gridz);
					m_far_U[i]->memset(0);
				}
			}
		}
		
	}
	else
	{
		m_gridx = gdx;
		m_gridy = gdy;
		m_gridz = gdz;
		m_M_eval = M;
		m_N_vort = N;


		m_PoissonSolver.m_InitialSystem(m_gridx, m_gridy, m_gridz);
		if(!m_isVIC){

			m_SpatialHasher_eval.initSpatialHash(m_M_eval, m_gridx,m_gridy,m_gridz);
		}
		m_SpatialHasher_vort.initSpatialHash(m_N_vort, m_gridx,m_gridy,m_gridz);


		m_evalPos->alloc(m_M_eval);
		m_evalPos->memset(make_float4(0,0,0,0));
		m_evalPos_Reorder->alloc(m_M_eval);
		m_evalPos_Reorder->memset(make_float4(0,0,0,0));

		m_p_vortPos->alloc(m_N_vort);
		m_p_vortPos->memset(make_float4(0,0,0,0));
		m_p_vortPos_Reorder->alloc(m_N_vort);
		m_p_vortPos_Reorder->memset(make_float4(0,0,0,0));

		for (int i=0;i<NUM_COMPONENTS;i++)
		{
			m_grid_Rhs[i]->alloc(m_gridx*m_gridy*m_gridz);
			m_grid_Rhs[i]->memset(0);

			m_particle_vort[i]->alloc(m_N_vort);
			m_particle_vort[i]->memset(0);

			m_particle_vort_Reorder[i]->alloc(m_N_vort);
			m_particle_vort_Reorder[i]->memset(0);

			m_grid_vort[i]->alloc(m_gridx*m_gridy*m_gridz);
			m_grid_vort[i]->memset(0);

			m_grid_Psi[i]->alloc(m_gridx*m_gridy*m_gridz);
			m_grid_Psi[i]->memset(0);

			m_particle_U[i]->alloc(m_M_eval);
			m_particle_U[i]->memset(0);

			m_particle_U_deorder[i]->alloc(m_M_eval);
			m_particle_U_deorder[i]->memset(0);

			m_grid_U[i]->alloc(m_gridx*m_gridy*m_gridz);
			m_grid_U[i]->memset(0);
			if(!m_isVIC){

				m_far_U[i]->alloc(m_gridx*m_gridy*m_gridz);
				m_far_U[i]->memset(0);
			}
		}




		m_initialized = true;

	}
	return true;
}
bool BiotSavartSolver::evaluateVelocity( GpuArrayf4 *another_end, uint is_segment )
{

	//m_p_vortPos->copy(gf_GpuArray<float4>::DEVICE_TO_HOST);
	//setDomain(m_origin, m_p_vortPos->getHostPtr(),m_N_vort,m_L);
	////printf("%f,%f,%f,%f,\n",m_origin.x,m_origin.y,m_origin.z,m_L);
	//m_SpatialHasher_eval.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx,
	//	make_float3(m_origin.x,m_origin.y,m_origin.z),
	//	m_M_eval);
	//m_SpatialHasher_eval.setHashParam();
	//m_SpatialHasher_eval.doSpatialHash(m_evalPos->getDevicePtr(),m_M_eval);
	//m_SpatialHasher_eval.reorderData(m_M_eval,m_evalPos->getDevicePtr(),m_evalPos_Reorder->getDevicePtr(),4,1);


	//m_ParticleToMesh();
	//m_SolvePoisson();
	//m_ComputeCurl();
	//m_Intepolate();
	//m_LocalCorrection(another_end);
	//m_unsortResult();
	GpuArrayf4 *temp_pos=new GpuArrayf4;
	temp_pos->alloc(m_M_eval);
	temp_pos->memset(make_float4(0,0,0,0));

	for(int i=0;i<NUM_COMPONENTS;i++)
	{
		m_particle_U[i]->memset(0);
		m_particle_U_deorder[i]->memset(0);
	}
	if(!m_isVIC){
		
		m_SpatialHasher_eval.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx,
			make_float3(m_origin.x,m_origin.y,m_origin.z),
			m_M_eval);
		m_SpatialHasher_eval.setHashParam();
		m_SpatialHasher_eval.doSpatialHash(m_evalPos->getDevicePtr(),m_M_eval);
		m_SpatialHasher_eval.reorderData(m_M_eval, m_evalPos->getDevicePtr(),m_evalPos_Reorder->getDevicePtr(),4,1);
		if(is_segment==1)
		{
			m_SpatialHasher_eval.reorderData(m_M_eval,another_end->getDevicePtr(),temp_pos->getDevicePtr(),4,1);
		}

		BiotSavartInterpolateFarField(m_evalPos_Reorder->getDevicePtr(),
			m_far_U[0]->getDevicePtr(),m_far_U[1]->getDevicePtr(), m_far_U[2]->getDevicePtr(),
			m_particle_U_deorder[0]->getDevicePtr(), m_particle_U_deorder[1]->getDevicePtr(),m_particle_U_deorder[2]->getDevicePtr(),
			m_SpatialHasher_vort.getCellSize().x,
			m_gridx,m_gridy,m_gridz,
			m_M_eval,
			m_origin);
		BiotSavartPPCorrScaleMN(m_SpatialHasher_vort.getStartTable(),
			m_SpatialHasher_vort.getEndTable(),
			m_evalPos_Reorder->getDevicePtr(),
			temp_pos->getDevicePtr(),
			is_segment,
			m_p_vortPos_Reorder->getDevicePtr(),
			m_particle_vort_Reorder[0]->getDevicePtr(),
			m_particle_vort_Reorder[1]->getDevicePtr(),
			m_particle_vort_Reorder[2]->getDevicePtr(),
			m_particle_U_deorder[0]->getDevicePtr(),
			m_particle_U_deorder[1]->getDevicePtr(),
			m_particle_U_deorder[2]->getDevicePtr(),
			m_grid_U[0]->getDevicePtr(),
			m_grid_U[1]->getDevicePtr(),
			m_grid_U[2]->getDevicePtr(),
			m_SpatialHasher_vort.getCellSize().x,
			make_uint3(m_gridx,m_gridy,m_gridz),
			make_uint3(m_gridx,m_gridy,m_gridz),
			m_K,
			m_M_eval,
			m_N_vort,
			m_origin);
		for (int c=0;c<3;c++)
		{
			m_SpatialHasher_eval.deorderData(m_M_eval,m_particle_U_deorder[c]->getDevicePtr(),m_particle_U[c]->getDevicePtr(),1,2);
		}
		
	}
	else
	{
		
		BiotSavartInterpolateFarField(m_evalPos->getDevicePtr(),
			m_grid_U[0]->getDevicePtr(),m_grid_U[1]->getDevicePtr(), m_grid_U[2]->getDevicePtr(),
			m_particle_U[0]->getDevicePtr(), m_particle_U[1]->getDevicePtr(),m_particle_U[2]->getDevicePtr(),
			m_SpatialHasher_vort.getCellSize().x,
			m_gridx,m_gridy,m_gridz,
			m_M_eval,
			m_origin);
		
	}

	//BiotSavartComputeVelocityForOutParticle(m_evalPos->getDevicePtr(),
	//	make_double3(m_total_vort[0],m_total_vort[1],m_total_vort[2]), 
	//	m_center,
	//	m_SpatialHasher_vort.getWorldOrigin(), 
	//	make_float3(m_SpatialHasher_vort.getWorldOrigin().x+m_L,
	//				 m_SpatialHasher_vort.getWorldOrigin().y+m_L,
	//				 m_SpatialHasher_vort.getWorldOrigin().z+m_L),
	//    m_particle_U[0]->getDevicePtr(),
	//	m_particle_U[1]->getDevicePtr(),
	//	m_particle_U[2]->getDevicePtr(),
	//	m_M_eval);

	temp_pos->free();

	return true;

}
Esempio n. 14
0
static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
void SSShadowMapDemo::renderPost()
{
	Stopwatch sw( m_deviceData );
	if(1)
	{
		sw.start();

		ConstData cb;
		{
			XMVECTOR v;
//			cb.m_viewInv = XMMatrixInverse( &v, g_ViewTr );
//			cb.m_projInv = XMMatrixInverse( &v, g_ProjectionTr );
			cb.m_viewInv = XMMatrixInverse( &v, XMMatrixMultiply( g_ProjectionTr, g_ViewTr ) );

			cb.m_width = g_wWidth;
			cb.m_height = g_wHeight;
			cb.m_shadowWeight = 0.6f/MAX_SHADOWS;
		}

		{	//	clear
			Buffer<int> normalBuffer; normalBuffer.m_srv = m_normalRT.m_srv;
			BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), 
				BufferInfo( &normalBuffer, true ) };
			Launcher launcher( m_deviceData, &m_clearKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
			launcher.setConst( m_constBuffer, cb );
			launcher.launch1D( g_wWidth*g_wHeight, 64 );
		}

		ID3D11RenderTargetView* m_rtv;
		ID3D11DepthStencilView* m_dsv;
		DeviceDX11* dd = (DeviceDX11*)m_deviceData;
		dd->m_context->OMGetRenderTargets( 1, &m_rtv, &m_dsv );

		for(int lightIdx=0; lightIdx<MAX_SHADOWS; lightIdx++)
		{
			XMMATRIX viewTr;
			XMMATRIX projTr;
			{	//	render light view
				dd->m_context->OMSetRenderTargets( 0, 0, m_depthBuffer[lightIdx].m_depthStencilView );
				dd->m_context->ClearDepthStencilView( m_depthBuffer[lightIdx].m_depthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0 );

				getMatrices<true>( m_lightPos[lightIdx], make_float4(0,0,0), make_float4(1,0,0,0), XM_PI*60.f/180.f, 1.f, 
					0.1f, 50.f, &viewTr, &projTr );
		
				dispatchRenderList( g_deviceData, &g_debugRenderObj, &viewTr, &projTr );
			}

			{	//	run compute shader for accumulation
				dd->m_context->OMSetRenderTargets( 0, 0, 0 );

				{
//					cb.m_lightView = viewTr;
//					cb.m_lightProj = projTr;
					//	== mul( mul( v, view ), proj ) in shader (Matrices are transposed)
					cb.m_lightView = XMMatrixMultiply( projTr, viewTr );
				}

				Buffer<int> depthBuffer;	depthBuffer.m_srv = g_depthStencil.m_srv;
				Buffer<int> shadowBuffer;	shadowBuffer.m_srv = m_depthBuffer[lightIdx].m_srv;

				BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), 
					BufferInfo( &depthBuffer, true ), BufferInfo( &shadowBuffer, true ) };

				Launcher launcher( m_deviceData, &m_shadowAccmKernel );
				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
				launcher.setConst( m_constBuffer, cb );
				launcher.launch2D( g_wWidth, g_wHeight, 8, 8 );
			}
		}

		m_rtv->Release();
		m_dsv->Release();
	}
	else
	{
		ConstData cb;
		{
			XMVECTOR v;
//			cb.m_viewInv = XMMatrixInverse( &v, g_ViewTr );
//			cb.m_projInv = XMMatrixInverse( &v, g_ProjectionTr );
			cb.m_viewInv = XMMatrixInverse( &v, XMMatrixMultiply( g_ProjectionTr, g_ViewTr ) );

			cb.m_width = g_wWidth;
			cb.m_height = g_wHeight;
			cb.m_shadowWeight = 0.6f/MAX_SHADOWS;
		}

		{	//	clear
			Buffer<int> normalBuffer; normalBuffer.m_srv = m_normalRT.m_srv;
			BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), 
				BufferInfo( &normalBuffer, true ) };
			Launcher launcher( m_deviceData, &m_clearKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
			launcher.setConst( m_constBuffer, cb );
			launcher.launch1D( g_wWidth*g_wHeight, 64 );
		}

		ID3D11RenderTargetView* m_rtv;
		ID3D11DepthStencilView* m_dsv;
		DeviceDX11* dd = (DeviceDX11*)m_deviceData;
		dd->m_context->OMGetRenderTargets( 1, &m_rtv, &m_dsv );

		XMMATRIX viewTr[MAX_SHADOWS];
		XMMATRIX projTr[MAX_SHADOWS];
		for(int lightIdx=0; lightIdx<MAX_SHADOWS; lightIdx++)
		{
			{	//	render light view
				dd->m_context->OMSetRenderTargets( 0, 0, m_depthBuffer[lightIdx].m_depthStencilView );
				dd->m_context->ClearDepthStencilView( m_depthBuffer[lightIdx].m_depthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0 );

				getMatrices<true>( m_lightPos[lightIdx], make_float4(0,0,0), make_float4(1,0,0,0), XM_PI*60.f/180.f, 1.f, 
					0.1f, 50.f, &viewTr[lightIdx], &projTr[lightIdx] );
		
				dispatchRenderList( g_deviceData, &g_debugRenderObj, &viewTr[lightIdx], &projTr[lightIdx] );
			}

			{	//	run compute shader for accumulation
				dd->m_context->OMSetRenderTargets( 0, 0, 0 );

				cb.m_shadowIdx = lightIdx;

				Buffer<int> shadowBuffer;	shadowBuffer.m_srv = m_depthBuffer[lightIdx].m_srv;

				BufferInfo bInfo[] = { BufferInfo( &m_lightMergedBuffer ),
					BufferInfo( &shadowBuffer, true ) };

				Launcher launcher( m_deviceData, &m_copyShadowMapKernel );
				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
				launcher.setConst( m_constBuffer, cb );
				launcher.launch1D( g_wWidth*g_wHeight, 64 );
			}
		}


		{	//	resolve in a shader
			{
				cb.m_shadowIdx = MAX_SHADOWS;
				for(int i=0; i<MAX_SHADOWS; i++)
				{
					viewTr[i] = XMMatrixMultiply( projTr[i], viewTr[i] );
				}
				m_lightMatrixBuffer.write( viewTr, MAX_SHADOWS );
				DeviceUtils::waitForCompletion( m_deviceData );
			}

			sw.start();

			Buffer<int> depthBuffer;	depthBuffer.m_srv = g_depthStencil.m_srv;
			Buffer<int> shadowBuffer;	shadowBuffer.m_srv = m_depthBuffer[0].m_srv;

			BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), 
				BufferInfo( &depthBuffer, true ), 
				BufferInfo( &m_lightMergedBuffer, true ),
				BufferInfo( &m_lightMatrixBuffer, true ) };

			Launcher launcher( m_deviceData, &m_shadowAccmAllKernel );
			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) );
			launcher.setConst( m_constBuffer, cb );
			launcher.launch2D( g_wWidth, g_wHeight, 8, 8 );

		}

		m_rtv->Release();
		m_dsv->Release();
	}
	sw.stop();

	{
		float t = sw.getMs();
		m_nTxtLines = 0;
		sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "%dlights, %3.2fms", MAX_SHADOWS, t);
	}

	resolve( &m_shadowAccumBuffer.m_srv );
//	resolveTexture( (void**)&m_colorRT.m_srv );
}
Esempio n. 16
0
static
__inline
void solveContact(Constraint4& cs,
                  const float4& posA, float4& linVelA, float4& angVelA, float invMassA, const Matrix3x3& invInertiaA,
                  const float4& posB, float4& linVelB, float4& angVelB, float invMassB, const Matrix3x3& invInertiaB,
                  float maxRambdaDt[4], float minRambdaDt[4])
{
    float4 dLinVelA = make_float4(0.f);
    float4 dAngVelA = make_float4(0.f);
    float4 dLinVelB = make_float4(0.f);
    float4 dAngVelB = make_float4(0.f);

    for(int ic=0; ic<4; ic++)
    {
        //	dont necessary because this makes change to 0
        if( cs.m_jacCoeffInv[ic] == 0.f ) continue;

        {
            float4 angular0, angular1, linear;
            float4 r0 = cs.m_worldPos[ic] - posA;
            float4 r1 = cs.m_worldPos[ic] - posB;
            setLinearAndAngular( -cs.m_linear, r0, r1, linear, angular0, angular1 );

            float rambdaDt = calcRelVel(cs.m_linear, -cs.m_linear, angular0, angular1,
                                        linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
            rambdaDt *= cs.m_jacCoeffInv[ic];

            {
                float prevSum = cs.m_appliedRambdaDt[ic];
                float updated = prevSum;
                updated += rambdaDt;
                updated = max2( updated, minRambdaDt[ic] );
                updated = min2( updated, maxRambdaDt[ic] );
                rambdaDt = updated - prevSum;
                cs.m_appliedRambdaDt[ic] = updated;
            }

            float4 linImp0 = invMassA*linear*rambdaDt;
            float4 linImp1 = invMassB*(-linear)*rambdaDt;
            float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;
            float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;
#ifdef _WIN32
            btAssert(_finite(linImp0.x));
            btAssert(_finite(linImp1.x));
#endif
            if( JACOBI )
            {
                dLinVelA += linImp0;
                dAngVelA += angImp0;
                dLinVelB += linImp1;
                dAngVelB += angImp1;
            }
            else
            {
                linVelA += linImp0;
                angVelA += angImp0;
                linVelB += linImp1;
                angVelB += angImp1;
            }
        }
    }

    if( JACOBI )
    {
        linVelA += dLinVelA;
        angVelA += dAngVelA;
        linVelB += dLinVelB;
        angVelB += dAngVelB;
    }
}
Esempio n. 17
0
int volumetric_knt_cuda(int argc, char **argv)
{
	Timer timer;
	int vol_size = vx_count * vx_size;
	float half_vol_size = vol_size * 0.5f;

	Eigen::Vector3i voxel_size(vx_size, vx_size, vx_size);
	Eigen::Vector3i volume_size(vol_size, vol_size, vol_size);
	Eigen::Vector3i voxel_count(vx_count, vx_count, vx_count);
	int total_voxels = voxel_count.x() * voxel_count.y() * voxel_count.z();


	std::cout << std::fixed
		<< "Voxel Count  : " << voxel_count.transpose() << std::endl
		<< "Voxel Size   : " << voxel_size.transpose() << std::endl
		<< "Volume Size  : " << volume_size.transpose() << std::endl
		<< "Total Voxels : " << total_voxels << std::endl
		<< std::endl;

	timer.start();
	KinectFrame knt(filepath);
	timer.print_interval("Importing knt frame : ");

	Eigen::Affine3f grid_affine = Eigen::Affine3f::Identity();
	grid_affine.translate(Eigen::Vector3f(0, 0, half_vol_size));
	grid_affine.scale(Eigen::Vector3f(1, 1, 1));	// z is negative inside of screen
	Eigen::Matrix4f grid_matrix = grid_affine.matrix();

	float knt_near_plane = 0.1f;
	float knt_far_plane = 10240.0f;
	Eigen::Matrix4f projection = perspective_matrix<float>(KINECT_V2_FOVY, KINECT_V2_DEPTH_ASPECT_RATIO, knt_near_plane, knt_far_plane);
	Eigen::Matrix4f projection_inverse = projection.inverse();
	Eigen::Matrix4f view_matrix = Eigen::Matrix4f::Identity();

	std::vector<float4> vertices(knt.depth.size(), make_float4(0, 0, 0, 1));
	std::vector<float4> normals(knt.depth.size(), make_float4(0, 0, 1, 1));
	std::vector<Eigen::Vector2f> grid_voxels_params(total_voxels);

	// 
	// setup image parameters
	//
	unsigned short image_width = KINECT_V2_DEPTH_WIDTH;
	unsigned short image_height = image_width / aspect_ratio;
	QImage img(image_width, image_height, QImage::Format::Format_RGBA8888);
	img.fill(Qt::GlobalColor::gray);
	uchar4* image_data = (uchar4*)img.bits();
	//float4* debug_buffer = new float4[image_width * image_height];
	//memset(debug_buffer, 0, image_width * image_height * sizeof(float4));

	knt_cuda_setup(
		vx_count, vx_size,
		grid_matrix.data(),
		projection.data(),
		projection_inverse.data(),
		*grid_voxels_params.data()->data(),
		KINECT_V2_DEPTH_WIDTH,
		KINECT_V2_DEPTH_HEIGHT,
		KINECT_V2_DEPTH_MIN,
		KINECT_V2_DEPTH_MAX,
		vertices.data()[0],
		normals.data()[0],
		image_width,
		image_height
		);

	timer.start();
	knt_cuda_allocate();
	knt_cuda_init_grid();
	timer.print_interval("Allocating gpu      : ");

	timer.start();
	knt_cuda_copy_host_to_device();
	knt_cuda_copy_depth_buffer_to_device(knt.depth.data());
	timer.print_interval("Copy host to device : ");

	timer.start();
	knt_cuda_normal_estimation();
	timer.print_interval("Normal estimation   : ");

	timer.start();
	knt_cuda_update_grid(view_matrix.data());
	timer.print_interval("Update grid         : ");

	timer.start();
	knt_cuda_grid_params_copy_device_to_host();
	knt_cuda_copy_device_to_host();
	timer.print_interval("Copy device to host : ");




	//
	// setup camera parameters
	//
	timer.start();
	Eigen::Affine3f camera_to_world = Eigen::Affine3f::Identity();
	float cam_z = -half_vol_size;
	camera_to_world.scale(Eigen::Vector3f(1, 1, -1));
	camera_to_world.translate(Eigen::Vector3f(half_vol_size, half_vol_size, cam_z));

	
	Eigen::Matrix4f camera_to_world_matrix = camera_to_world.matrix();
		
	knt_cuda_raycast(KINECT_V2_FOVY, KINECT_V2_DEPTH_ASPECT_RATIO, camera_to_world_matrix.data());
	timer.print_interval("Raycast             : ");

	timer.start();
	knt_cuda_copy_image_device_to_host(*(uchar4*)img.bits());
	timer.print_interval("Copy Img to host    : ");
	
	timer.start();
	knt_cuda_free();
	timer.print_interval("Cleanup gpu         : ");

#if 0
	//memset(image_data, 0, image_width * image_height * sizeof(uchar4));
	//memset(debug_buffer, 0, image_width * image_height * sizeof(float4));

	Eigen::Vector3f camera_pos = camera_to_world_matrix.col(3).head<3>();
	float fov_scale = (float)tan(DegToRad(KINECT_V2_FOVY * 0.5f));
	float aspect_ratio = KINECT_V2_DEPTH_ASPECT_RATIO;


	//
	// for each pixel, trace a ray
	//
	timer.start();
	for (int y = 0; y < image_height; ++y)
	{
		for (int x = 0; x < image_width; ++x)
		{
			// Convert from image space (in pixels) to screen space
			// Screen Space along X axis = [-aspect ratio, aspect ratio] 
			// Screen Space along Y axis = [-1, 1]
			float x_norm = (2.f * float(x) + 0.5f) / (float)image_width;
			float y_norm = (2.f * float(y) + 0.5f) / (float)image_height;
			Eigen::Vector3f screen_coord(
				(x_norm - 1.f) * aspect_ratio * fov_scale,
				(1.f - y_norm) * fov_scale,
				1.0f);

			Eigen::Vector3f direction;
			multDirMatrix(screen_coord, camera_to_world_matrix, direction);
			direction.normalize();

			long voxels_zero_crossing[2] = { -1, -1 };

			int hit_count = raycast_tsdf_volume<float>(
				camera_pos,
				direction,
				voxel_count.cast<int>(),
				voxel_size.cast<int>(),
				grid_voxels_params,
				voxels_zero_crossing);

			if (hit_count > 0)
			{
				if (hit_count == 2)
				{
					float4 n = normals[y * image_width + x];

					//image_data[y * image_width + x].x = 0;
					//image_data[y * image_width + x].y = 128;
					//image_data[y * image_width + x].z = 128;
					//image_data[y * image_width + x].w = 255;
					
					image_data[y * image_width + x].x = uchar((n.x * 0.5f + 0.5f) * 255);
					image_data[y * image_width + x].y = uchar((n.y * 0.5f + 0.5f) * 255);
					image_data[y * image_width + x].z = uchar((n.z * 0.5f + 0.5f) * 255);
					image_data[y * image_width + x].w = 255;
				}
				else
				{
					image_data[y * image_width + x].x = 128;
					image_data[y * image_width + x].y = 128;
					image_data[y * image_width + x].z = 0;
					image_data[y * image_width + x].w = 255;
				}
			}
			else
			{
				image_data[y * image_width + x].x = 128;
				image_data[y * image_width + x].y = 0;
				image_data[y * image_width + x].z = 0;
				image_data[y * image_width + x].w = 255;
			}
		}
	}
	timer.print_interval("Raycasting to image     : ");
	//export_debug_buffer("../../data/cpu_image_data_screen_coord_f4.txt", debug_buffer, image_width, image_height);
	//export_image_buffer("../../data/cpu_image_data_screen_coord_uc.txt", image_data, image_width, image_height);
#else

	//export_debug_buffer("../../data/gpu_image_data_screen_coord_f4.txt", debug_buffer, image_width, image_height);
	//export_image_buffer("../../data/gpu_image_data_screen_coord_uc.txt", image_data, image_width, image_height);
#endif

	



	QImage image(&image_data[0].x, image_width, image_height, QImage::Format_RGBA8888);
	//image.fill(Qt::GlobalColor::black);
	QApplication app(argc, argv);
	QImageWidget widget;
	widget.resize(KINECT_V2_DEPTH_WIDTH, KINECT_V2_DEPTH_HEIGHT);
	widget.setImage(image);
	widget.show();

	return app.exec();
}