__host__ float4 make_float4( const Vector4f& v ) { return make_float4( v.x, v.y, v.z, v.w ); }
template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
void Camera::device_update(Device *device, DeviceScene *dscene, Scene *scene) { Scene::MotionType need_motion = scene->need_motion(device->info.advanced_shading); update(); if(previous_need_motion != need_motion) { /* scene's motion model could have been changed since previous device * camera update this could happen for example in case when one render * layer has got motion pass and another not */ need_device_update = true; } if(!need_device_update) return; KernelCamera *kcam = &dscene->data.cam; /* store matrices */ kcam->screentoworld = screentoworld; kcam->rastertoworld = rastertoworld; kcam->rastertocamera = rastertocamera; kcam->cameratoworld = cameratoworld; kcam->worldtocamera = worldtocamera; kcam->worldtoscreen = worldtoscreen; kcam->worldtoraster = worldtoraster; kcam->worldtondc = worldtondc; /* camera motion */ kcam->have_motion = 0; kcam->have_perspective_motion = 0; if(need_motion == Scene::MOTION_PASS) { /* TODO(sergey): Support perspective (zoom, fov) motion. */ if(type == CAMERA_PANORAMA) { if(use_motion) { kcam->motion.pre = transform_inverse(motion.pre); kcam->motion.post = transform_inverse(motion.post); } else { kcam->motion.pre = kcam->worldtocamera; kcam->motion.post = kcam->worldtocamera; } } else { if(use_motion) { kcam->motion.pre = cameratoraster * transform_inverse(motion.pre); kcam->motion.post = cameratoraster * transform_inverse(motion.post); } else { kcam->motion.pre = worldtoraster; kcam->motion.post = worldtoraster; } } } #ifdef __CAMERA_MOTION__ else if(need_motion == Scene::MOTION_BLUR) { if(use_motion) { transform_motion_decompose((DecompMotionTransform*)&kcam->motion, &motion, &matrix); kcam->have_motion = 1; } if(use_perspective_motion) { kcam->perspective_motion = perspective_motion; kcam->have_perspective_motion = 1; } } #endif /* depth of field */ kcam->aperturesize = aperturesize; kcam->focaldistance = focaldistance; kcam->blades = (blades < 3)? 0.0f: blades; kcam->bladesrotation = bladesrotation; /* motion blur */ #ifdef __CAMERA_MOTION__ kcam->shuttertime = (need_motion == Scene::MOTION_BLUR) ? shuttertime: -1.0f; if(need_motion == Scene::MOTION_BLUR) { vector<float> shutter_table; util_cdf_inverted(SHUTTER_TABLE_SIZE, 0.0f, 1.0f, function_bind(shutter_curve_eval, _1, shutter_curve), false, shutter_table); shutter_table_offset = scene->lookup_tables->add_table(dscene, shutter_table); kcam->shutter_table_offset = (int)shutter_table_offset; } else if(shutter_table_offset != TABLE_OFFSET_INVALID) { scene->lookup_tables->remove_table(shutter_table_offset); shutter_table_offset = TABLE_OFFSET_INVALID; } #else kcam->shuttertime = -1.0f; #endif /* type */ kcam->type = type; /* anamorphic lens bokeh */ kcam->inv_aperture_ratio = 1.0f / aperture_ratio; /* panorama */ kcam->panorama_type = panorama_type; kcam->fisheye_fov = fisheye_fov; kcam->fisheye_lens = fisheye_lens; kcam->equirectangular_range = make_float4(longitude_min - longitude_max, -longitude_min, latitude_min - latitude_max, -latitude_min + M_PI_2_F); /* sensor size */ kcam->sensorwidth = sensorwidth; kcam->sensorheight = sensorheight; /* render size */ kcam->width = width; kcam->height = height; kcam->resolution = resolution; /* store differentials */ kcam->dx = float3_to_float4(dx); kcam->dy = float3_to_float4(dy); /* clipping */ kcam->nearclip = nearclip; kcam->cliplength = (farclip == FLT_MAX)? FLT_MAX: farclip - nearclip; /* Camera in volume. */ kcam->is_inside_volume = 0; /* Rolling shutter effect */ kcam->rolling_shutter_type = rolling_shutter_type; kcam->rolling_shutter_duration = rolling_shutter_duration; previous_need_motion = need_motion; }
int main(int argc, char** argv) { std::cout << "Starting iu_image_gpu_unittest ..." << std::endl; // test image size IuSize sz(79,63); iu::ImageGpu_8u_C1 im_gpu_8u_C1(sz); iu::ImageGpu_8u_C2 im_gpu_8u_C2(sz); iu::ImageGpu_8u_C3 im_gpu_8u_C3(sz); iu::ImageGpu_8u_C4 im_gpu_8u_C4(sz); iu::ImageGpu_32f_C1 im_gpu_32f_C1(sz); iu::ImageGpu_32f_C2 im_gpu_32f_C2(sz); iu::ImageGpu_32f_C3 im_gpu_32f_C3(sz); iu::ImageGpu_32f_C4 im_gpu_32f_C4(sz); unsigned char set_value_8u_C1 = 1; uchar2 set_value_8u_C2 = make_uchar2(2,2); uchar3 set_value_8u_C3 = make_uchar3(3,3,3); uchar4 set_value_8u_C4 = make_uchar4(4,4,4,4); float set_value_32f_C1 = 1.1f; float2 set_value_32f_C2 = make_float2(2.2f); float3 set_value_32f_C3 = make_float3(3.3f); float4 set_value_32f_C4 = make_float4(4.4f); // copy values back to cpu to compare the set values iu::ImageCpu_8u_C1 im_cpu_8u_C1(sz); iu::ImageCpu_8u_C2 im_cpu_8u_C2(sz); iu::ImageCpu_8u_C3 im_cpu_8u_C3(sz); iu::ImageCpu_8u_C4 im_cpu_8u_C4(sz); iu::ImageCpu_32f_C1 im_cpu_32f_C1(sz); iu::ImageCpu_32f_C2 im_cpu_32f_C2(sz); iu::ImageCpu_32f_C3 im_cpu_32f_C3(sz); iu::ImageCpu_32f_C4 im_cpu_32f_C4(sz); // set values on cpu and copy to gpu and back again { std::cout << "Testing copy. setValue on cpu (should work because of previous test) and copy forth and back" << std::endl; iu::setValue(set_value_8u_C1, &im_cpu_8u_C1, im_cpu_8u_C1.roi()); iu::setValue(set_value_8u_C2, &im_cpu_8u_C2, im_cpu_8u_C2.roi()); iu::setValue(set_value_8u_C3, &im_cpu_8u_C3, im_cpu_8u_C3.roi()); iu::setValue(set_value_8u_C4, &im_cpu_8u_C4, im_cpu_8u_C4.roi()); iu::setValue(set_value_32f_C1, &im_cpu_32f_C1, im_cpu_32f_C1.roi()); iu::setValue(set_value_32f_C2, &im_cpu_32f_C2, im_cpu_32f_C2.roi()); iu::setValue(set_value_32f_C3, &im_cpu_32f_C3, im_cpu_32f_C3.roi()); iu::setValue(set_value_32f_C4, &im_cpu_32f_C4, im_cpu_32f_C4.roi()); std::cout << " copy cpu -> gpu ..." << std::endl; iu::copy(&im_cpu_8u_C1, &im_gpu_8u_C1); iu::copy(&im_cpu_8u_C2, &im_gpu_8u_C2); iu::copy(&im_cpu_8u_C3, &im_gpu_8u_C3); iu::copy(&im_cpu_8u_C4, &im_gpu_8u_C4); iu::copy(&im_cpu_32f_C1, &im_gpu_32f_C1); iu::copy(&im_cpu_32f_C2, &im_gpu_32f_C2); iu::copy(&im_cpu_32f_C3, &im_gpu_32f_C3); iu::copy(&im_cpu_32f_C4, &im_gpu_32f_C4); std::cout << " copy gpu -> cpu ..." << std::endl; iu::copy(&im_gpu_8u_C1, &im_cpu_8u_C1); iu::copy(&im_gpu_8u_C2, &im_cpu_8u_C2); iu::copy(&im_gpu_8u_C3, &im_cpu_8u_C3); iu::copy(&im_gpu_8u_C4, &im_cpu_8u_C4); iu::copy(&im_gpu_32f_C1, &im_cpu_32f_C1); iu::copy(&im_gpu_32f_C2, &im_cpu_32f_C2); iu::copy(&im_gpu_32f_C3, &im_cpu_32f_C3); iu::copy(&im_gpu_32f_C4, &im_cpu_32f_C4); std::cout << " check copied values on cpu ..." << std::endl; // check if set values are correct for (unsigned int y = 0; y<sz.height; ++y) { for (unsigned int x = 0; x<sz.width; ++x) { // 8-bit if( *im_cpu_8u_C1.data(x,y) != set_value_8u_C1) return EXIT_FAILURE; if( *im_cpu_8u_C2.data(x,y) != set_value_8u_C2) return EXIT_FAILURE; if( *im_cpu_8u_C3.data(x,y) != set_value_8u_C3) return EXIT_FAILURE; if( *im_cpu_8u_C4.data(x,y) != set_value_8u_C4) return EXIT_FAILURE; // 32-bit if( *im_cpu_32f_C1.data(x,y) != set_value_32f_C1) return EXIT_FAILURE; if( *im_cpu_32f_C2.data(x,y) != set_value_32f_C2) return EXIT_FAILURE; if( *im_cpu_32f_C3.data(x,y) != set_value_32f_C3) return EXIT_FAILURE; if( *im_cpu_32f_C4.data(x,y) != set_value_32f_C4) return EXIT_FAILURE; } } } // set values on gpu { std::cout << "Testing setValue on gpu (implecitely testing copy gpu->cpu) ..." << std::endl; iu::setValue(set_value_8u_C1, &im_gpu_8u_C1, im_gpu_8u_C1.roi()); iu::setValue(set_value_8u_C2, &im_gpu_8u_C2, im_gpu_8u_C2.roi()); iu::setValue(set_value_8u_C3, &im_gpu_8u_C3, im_gpu_8u_C3.roi()); iu::setValue(set_value_8u_C4, &im_gpu_8u_C4, im_gpu_8u_C4.roi()); iu::setValue(set_value_32f_C1, &im_gpu_32f_C1, im_gpu_32f_C1.roi()); iu::setValue(set_value_32f_C2, &im_gpu_32f_C2, im_gpu_32f_C2.roi()); iu::setValue(set_value_32f_C3, &im_gpu_32f_C3, im_gpu_32f_C3.roi()); iu::setValue(set_value_32f_C4, &im_gpu_32f_C4, im_gpu_32f_C4.roi()); std::cout << "Copy gpu images to cpu for checking the set values." << std::endl; iu::copy(&im_gpu_8u_C1, &im_cpu_8u_C1); iu::copy(&im_gpu_8u_C2, &im_cpu_8u_C2); iu::copy(&im_gpu_8u_C3, &im_cpu_8u_C3); iu::copy(&im_gpu_8u_C4, &im_cpu_8u_C4); iu::copy(&im_gpu_32f_C1, &im_cpu_32f_C1); iu::copy(&im_gpu_32f_C2, &im_cpu_32f_C2); iu::copy(&im_gpu_32f_C3, &im_cpu_32f_C3); iu::copy(&im_gpu_32f_C4, &im_cpu_32f_C4); // check if set values are correct for (unsigned int y = 0; y<sz.height; ++y) { for (unsigned int x = 0; x<sz.width; ++x) { // 8-bit if( *im_cpu_8u_C1.data(x,y) != set_value_8u_C1) return EXIT_FAILURE; if( *im_cpu_8u_C2.data(x,y) != set_value_8u_C2) return EXIT_FAILURE; if( *im_cpu_8u_C3.data(x,y) != set_value_8u_C3) return EXIT_FAILURE; if( *im_cpu_8u_C4.data(x,y) != set_value_8u_C4) return EXIT_FAILURE; // 32-bit if( *im_cpu_32f_C1.data(x,y) != set_value_32f_C1) return EXIT_FAILURE; if( *im_cpu_32f_C2.data(x,y) != set_value_32f_C2) return EXIT_FAILURE; if( *im_cpu_32f_C3.data(x,y) != set_value_32f_C3) return EXIT_FAILURE; if( *im_cpu_32f_C4.data(x,y) != set_value_32f_C4) return EXIT_FAILURE; } } } // copy gpu -> gpu test { std::cout << "testing copy gpu -> gpu ..." << std::endl; iu::ImageGpu_8u_C1 cp_gpu_8u_C1(sz); iu::ImageGpu_8u_C2 cp_gpu_8u_C2(sz); iu::ImageGpu_8u_C3 cp_gpu_8u_C3(sz); iu::ImageGpu_8u_C4 cp_gpu_8u_C4(sz); iu::ImageGpu_32f_C1 cp_gpu_32f_C1(sz); iu::ImageGpu_32f_C2 cp_gpu_32f_C2(sz); iu::ImageGpu_32f_C3 cp_gpu_32f_C3(sz); iu::ImageGpu_32f_C4 cp_gpu_32f_C4(sz); iu::copy(&im_gpu_8u_C1, &cp_gpu_8u_C1); iu::copy(&im_gpu_8u_C2, &cp_gpu_8u_C2); iu::copy(&im_gpu_8u_C3, &cp_gpu_8u_C3); iu::copy(&im_gpu_8u_C4, &cp_gpu_8u_C4); iu::copy(&im_gpu_32f_C1, &cp_gpu_32f_C1); iu::copy(&im_gpu_32f_C2, &cp_gpu_32f_C2); iu::copy(&im_gpu_32f_C3, &cp_gpu_32f_C3); iu::copy(&im_gpu_32f_C4, &cp_gpu_32f_C4); iu::copy(&cp_gpu_8u_C1, &im_cpu_8u_C1); iu::copy(&cp_gpu_8u_C2, &im_cpu_8u_C2); iu::copy(&cp_gpu_8u_C3, &im_cpu_8u_C3); iu::copy(&cp_gpu_8u_C4, &im_cpu_8u_C4); iu::copy(&cp_gpu_32f_C1, &im_cpu_32f_C1); iu::copy(&cp_gpu_32f_C2, &im_cpu_32f_C2); iu::copy(&cp_gpu_32f_C3, &im_cpu_32f_C3); iu::copy(&cp_gpu_32f_C4, &im_cpu_32f_C4); // check if set values are correct for (unsigned int y = 0; y<sz.height; ++y) { for (unsigned int x = 0; x<sz.width; ++x) { // 8-bit if( *im_cpu_8u_C1.data(x,y) != set_value_8u_C1) return EXIT_FAILURE; if( *im_cpu_8u_C2.data(x,y) != set_value_8u_C2) return EXIT_FAILURE; if( *im_cpu_8u_C3.data(x,y) != set_value_8u_C3) return EXIT_FAILURE; if( *im_cpu_8u_C4.data(x,y) != set_value_8u_C4) return EXIT_FAILURE; // 32-bit if( *im_cpu_32f_C1.data(x,y) != set_value_32f_C1) return EXIT_FAILURE; if( *im_cpu_32f_C2.data(x,y) != set_value_32f_C2) return EXIT_FAILURE; if( *im_cpu_32f_C3.data(x,y) != set_value_32f_C3) return EXIT_FAILURE; if( *im_cpu_32f_C4.data(x,y) != set_value_32f_C4) return EXIT_FAILURE; } } } std::cout << std::endl; std::cout << "**************************************************************************" << std::endl; std::cout << "* Everything seem to be ok. -- All assertions passed. *" << std::endl; std::cout << "* Look at the images and close the windows to derminate the unittests. *" << std::endl; std::cout << "**************************************************************************" << std::endl; std::cout << std::endl; return EXIT_SUCCESS; }
static float4 scalarToCudaType(const cv::Scalar& in) { return make_float4((float)in[0], (float)in[1], (float)in[2], (float)in[3]); }
void BVHBuild::rotate(BVHNode *node, int max_depth) { /* nothing to rotate if we reached a leaf node. */ if(node->is_leaf() || max_depth < 0) return; InnerNode *parent = (InnerNode*)node; /* rotate all children first */ for(size_t c = 0; c < 2; c++) rotate(parent->children[c], max_depth-1); /* compute current area of all children */ BoundBox bounds0 = parent->children[0]->m_bounds; BoundBox bounds1 = parent->children[1]->m_bounds; float area0 = bounds0.half_area(); float area1 = bounds1.half_area(); float4 child_area = make_float4(area0, area1, 0.0f, 0.0f); /* find best rotation. we pick a target child of a first child, and swap * this with an other child. we perform the best such swap. */ float best_cost = FLT_MAX; int best_child = -1, best_target = -1, best_other = -1; for(size_t c = 0; c < 2; c++) { /* ignore leaf nodes as we cannot descent into */ if(parent->children[c]->is_leaf()) continue; InnerNode *child = (InnerNode*)parent->children[c]; BoundBox& other = (c == 0)? bounds1: bounds0; /* transpose child bounds */ BoundBox target0 = child->children[0]->m_bounds; BoundBox target1 = child->children[1]->m_bounds; /* compute cost for both possible swaps */ float cost0 = merge(other, target1).half_area() - child_area[c]; float cost1 = merge(target0, other).half_area() - child_area[c]; if(min(cost0,cost1) < best_cost) { best_child = (int)c; best_other = (int)(1-c); if(cost0 < cost1) { best_cost = cost0; best_target = 0; } else { best_cost = cost0; best_target = 1; } } } /* if we did not find a swap that improves the SAH then do nothing */ if(best_cost >= 0) return; assert(best_child == 0 || best_child == 1); assert(best_target != -1); /* perform the best found tree rotation */ InnerNode *child = (InnerNode*)parent->children[best_child]; swap(parent->children[best_other], child->children[best_target]); child->m_bounds = merge(child->children[0]->m_bounds, child->children[1]->m_bounds); }
void Projector::drawFrustum() { static const float4 akPoints[24] = { make_float4(-1.0f, -1.0f, -1.0f, 1.0f), make_float4(+1.0f, -1.0f, -1.0f, 1.0f), make_float4(-1.0f, -1.0f, -1.0f, 1.0f), make_float4(-1.0f, +1.0f, -1.0f, 1.0f), make_float4(+1.0f, -1.0f, -1.0f, 1.0f), make_float4(+1.0f, +1.0f, -1.0f, 1.0f), make_float4(-1.0f, +1.0f, -1.0f, 1.0f), make_float4(+1.0f, +1.0f, -1.0f, 1.0f), make_float4(-1.0f, -1.0f, +1.0f, 1.0f), make_float4(+1.0f, -1.0f, +1.0f, 1.0f), make_float4(-1.0f, -1.0f, +1.0f, 1.0f), make_float4(-1.0f, +1.0f, +1.0f, 1.0f), make_float4(+1.0f, -1.0f, +1.0f, 1.0f), make_float4(+1.0f, +1.0f, +1.0f, 1.0f), make_float4(-1.0f, +1.0f, +1.0f, 1.0f), make_float4(+1.0f, +1.0f, +1.0f, 1.0f), make_float4(-1.0f, -1.0f, -1.0f, 1.0f), make_float4(-1.0f, -1.0f, +1.0f, 1.0f), make_float4(+1.0f, -1.0f, -1.0f, 1.0f), make_float4(+1.0f, -1.0f, +1.0f, 1.0f), make_float4(-1.0f, +1.0f, -1.0f, 1.0f), make_float4(-1.0f, +1.0f, +1.0f, 1.0f), make_float4(+1.0f, +1.0f, -1.0f, 1.0f), make_float4(+1.0f, +1.0f, +1.0f, 1.0f) }; float16 kInvViewPrj = inverse((m_kModelViewMatrix * m_kProjectionMatrix)); glPushAttrib(GL_LIGHTING_BIT); glPushAttrib(GL_CURRENT_BIT); glDisable(GL_DEPTH_TEST); glPushMatrix(); { glPointSize(10.0f); glColor3f(1.0f, 1.0f, 0.0f); glBegin(GL_POINTS); for(uint i = 0; i < 4; i ++) { float4 kP = m_akCorners[i]; kP /= kP.w; glVertex3f(kP.x, kP.y, kP.z); } glEnd(); glBegin(GL_LINES); for(uint i = 0; i < 4; i ++) { uint u = i; uint v = (i >= 3) ? 0 : (i + 1); float4 kP0 = m_akCorners[u]; float4 kP1 = m_akCorners[v]; kP0 /= kP0.w; kP1 /= kP1.w; glVertex3f(kP0.x, kP0.y, kP0.z); glVertex3f(kP1.x, kP1.y, kP1.z); } glEnd(); glColor3f(1.0f, 0.0f, 0.0f); glBegin(GL_LINES); for(uint i = 0; i < 24; i ++) { float4 kP = kInvViewPrj * akPoints[i]; kP /= kP.w; glVertex3f(kP.x, kP.y, kP.z); } glEnd(); } glPopMatrix(); glEnable(GL_DEPTH_TEST); glPopAttrib(); glPopAttrib(); }
// main entry point int main( int argc, char** argv ) { printf("imagenet-console\n args (%i): ", argc); for( int i=0; i < argc; i++ ) printf("%i [%s] ", i, argv[i]); printf("\n\n"); // retrieve filename argument if( argc < 2 ) { printf("imagenet-console: input image filename required\n"); return 0; } const char* imgFilename = argv[1]; // create imageNet imageNet* net = imageNet::Create(argc, argv); if( !net ) { printf("imagenet-console: failed to initialize imageNet\n"); return 0; } net->EnableProfiler(); // load image from file on disk float* imgCPU = NULL; float* imgCUDA = NULL; int imgWidth = 0; int imgHeight = 0; if( !loadImageRGBA(imgFilename, (float4**)&imgCPU, (float4**)&imgCUDA, &imgWidth, &imgHeight) ) { printf("failed to load image '%s'\n", imgFilename); return 0; } float confidence = 0.0f; // classify image const int img_class = net->Classify(imgCUDA, imgWidth, imgHeight, &confidence); if( img_class >= 0 ) { printf("imagenet-console: '%s' -> %2.5f%% class #%i (%s)\n", imgFilename, confidence * 100.0f, img_class, net->GetClassDesc(img_class)); if( argc > 2 ) { const char* outputFilename = argv[2]; // overlay the classification on the image cudaFont* font = cudaFont::Create(); if( font != NULL ) { char str[512]; sprintf(str, "%2.3f%% %s", confidence * 100.0f, net->GetClassDesc(img_class)); const int overlay_x = 10; const int overlay_y = 10; const int px_offset = overlay_y * imgWidth * 4 + overlay_x * 4; // if the image has a white background, use black text (otherwise, white) const float white_cutoff = 225.0f; bool white_background = false; if( imgCPU[px_offset] > white_cutoff && imgCPU[px_offset + 1] > white_cutoff && imgCPU[px_offset + 2] > white_cutoff ) white_background = true; // overlay the text on the image font->RenderOverlay((float4*)imgCUDA, (float4*)imgCUDA, imgWidth, imgHeight, (const char*)str, 10, 10, white_background ? make_float4(0.0f, 0.0f, 0.0f, 255.0f) : make_float4(255.0f, 255.0f, 255.0f, 255.0f)); } printf("imagenet-console: attempting to save output image to '%s'\n", outputFilename); if( !saveImageRGBA(outputFilename, (float4*)imgCPU, imgWidth, imgHeight) ) printf("imagenet-console: failed to save output image to '%s'\n", outputFilename); else printf("imagenet-console: completed saving '%s'\n", outputFilename); } } else printf("imagenet-console: failed to classify '%s' (result=%i)\n", imgFilename, img_class); printf("\nshutting down...\n"); CUDA(cudaFreeHost(imgCPU)); delete net; return 0; }
static void mikk_set_tangent_space(const SMikkTSpaceContext *context, const float T[], const float sign, const int face, const int vert) { MikkUserData *userdata = (MikkUserData*)context->m_pUserData; userdata->tangent[face*4 + vert] = make_float4(T[0], T[1], T[2], sign); }
float4 Projector::findProjectedRange( uint uiIntersectionCount, float4 akIntersections[32], const float16 &rkViewPrj) { GLint aiViewport[4]; glGetIntegerv(GL_VIEWPORT, aiViewport); uint uiProjectedCount = 0; float4 akProjected[32]; for( uint i = 0; i < uiIntersectionCount; i++) { float4 kDW = rkViewPrj * akIntersections[i]; kDW /= kDW.w; float dWX = kDW.x; float dWY = kDW.y; float dWZ = kDW.z; float4 kP; kP.x = (2.0 * (dWX - aiViewport[0]) / aiViewport[2]) - 1.0; kP.y = (2.0 * (dWY - aiViewport[1]) / aiViewport[3]) - 1.0; kP.z = 2.0 * dWZ - 1.0; kP.w = 1.0; akProjected[uiProjectedCount++] = kP; } float2 kMin = make_float2(+1.0f, +1.0f); float2 kMax = make_float2(-1.0f, -1.0f); if(uiProjectedCount > 0) { for( uint i = 0; i < uiProjectedCount; i++) { if(akProjected[i].x < kMin.x) kMin.x = akProjected[i].x; if(akProjected[i].y < kMin.y) kMin.y = akProjected[i].y; if(akProjected[i].x > kMax.x) kMax.x = akProjected[i].x; if(akProjected[i].y > kMax.y) kMax.y = akProjected[i].y; } kMin.x -= 0.01f; kMin.y -= 0.01f; kMax.x += 0.01f; kMax.y += 0.01f; } else { kMin = make_float2(-1.0f, -1.0f); kMax = make_float2(+1.0f, +1.0f); } if(kMin.x < -1.0f) kMin.x = -1.0f; if(kMin.y < -1.0f) kMin.y = -1.0f; if(kMax.x < +1.0f) kMax.x = +1.0f; if(kMax.y < +1.0f) kMax.y = +1.0f; if(fabs(kMin.x - kMax.x) < 0.000001) { kMin.x = -1.0; kMax.x = +1.0; } if(fabs(kMin.y - kMax.y) < 0.000001) { kMin.y = -1.0; kMax.y = +1.0; } float4 kRange = make_float4(kMin.x, kMin.y, kMax.x, kMax.y); return kRange; }
bool BiotSavartSolver::m_ParticleToMesh() { m_SpatialHasher_vort.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx, make_float3(m_origin.x,m_origin.y,m_origin.z), m_N_vort); m_SpatialHasher_vort.setHashParam(); m_SpatialHasher_vort.doSpatialHash(m_p_vortPos->getDevicePtr(),m_N_vort); m_p_vortPos_Reorder->memset(make_float4(0,0,0,0)); m_SpatialHasher_vort.reorderData(m_N_vort, (void*)(m_p_vortPos->getDevicePtr()), (void*)(m_p_vortPos_Reorder->getDevicePtr()), 4, 1); for(int i=0;i<NUM_COMPONENTS;i++) { m_particle_vort_Reorder[i]->memset(0); m_SpatialHasher_vort.reorderData(m_N_vort, (void*)(m_particle_vort[i]->getDevicePtr()), (void*)(m_particle_vort_Reorder[i]->getDevicePtr()), 1, 2); } for (int c=0;c<NUM_COMPONENTS;c++) { m_grid_vort[c]->memset(0); ParticleToMesh(m_SpatialHasher_vort.getStartTable(), m_SpatialHasher_vort.getEndTable(), m_p_vortPos_Reorder->getDevicePtr(), m_particle_vort_Reorder[c]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x, m_grid_vort[c]->getDevicePtr(), make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), m_N_vort, m_origin); cudaMemcpy(m_grid_Rhs[c]->getDevicePtr(), m_grid_vort[c]->getDevicePtr(), m_grid_Rhs[c]->getSize()*m_grid_Rhs[c]->typeSize(), cudaMemcpyDeviceToDevice); ComputeRHS(m_grid_Rhs[c]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x*m_SpatialHasher_vort.getCellSize().x, -1.0, m_gridx*m_gridy*m_gridz); //m_p_vortPos_Reorder->copy(GpuArrayf4::DEVICE_TO_HOST); //m_particle_vort_Reorder[c]->copy(GpuArrayd::DEVICE_TO_HOST); //double total_weight = 0; //double total_mass = 0; //for(int i=0; i<m_N_vort; i++) //{ // double *host = m_particle_vort_Reorder[c]->getHostPtr(); // total_weight += fabs(host[i]); // total_mass += host[i]; //} //double cx=0, cy=0, cz=0; //for(int i=0; i<m_N_vort; i++) //{ // float4 *hpos = m_p_vortPos_Reorder->getHostPtr(); // double *hmass = m_particle_vort_Reorder[c]->getHostPtr(); // cx+=hpos[i].x*fabs(hmass[i]); // cy+=hpos[i].y*fabs(hmass[i]); // cz+=hpos[i].z*fabs(hmass[i]); // //printf("%f,%f,%f\n",cx,cy,cz); //} //cx=cx/total_weight; //cy=cy/total_weight; //cz=cz/total_weight; //m_center.x = cx; //m_center.y = cy; //m_center.z = cz; //m_total_vort[c] = total_mass; ////printf("%f,%f,%f,%f\n",cx,cy,cz,total_mass); //applyDirichlet(m_grid_Rhs[c]->getDevicePtr(), // make_double4(cx,cy,cz,0), // total_mass, // m_origin, // m_SpatialHasher_vort.getCellSize().x, // m_gridx, // m_gridy, // m_gridz); } return true; }
bool BiotSavartSolver::initializeSolver(uint gdx, uint gdy, uint gdz, bool isVIC, int K, uint M, uint N) { m_isVIC = isVIC; m_K = K; if(m_initialized) { if(gdx==m_gridx && gdy==m_gridy && gdz==m_gridz && M==m_M_eval && N==m_N_vort) { //zerofy memory m_evalPos->memset(make_float4(0,0,0,0)); m_evalPos_Reorder->memset(make_float4(0,0,0,0)); m_p_vortPos->memset(make_float4(0,0,0,0)); m_p_vortPos_Reorder->memset(make_float4(0,0,0,0)); for (int i=0;i<NUM_COMPONENTS;i++) { m_grid_Rhs[i]->memset(0); m_particle_vort[i]->memset(0); m_particle_vort_Reorder[i]->memset(0); m_grid_vort[i]->memset(0); m_grid_Psi[i]->memset(0); m_particle_U[i]->memset(0); m_particle_U_deorder[i]->memset(0); m_grid_U[i]->memset(0); if(!m_isVIC) m_far_U[i]->memset(0); } } else //just reinitialize everything { m_gridx = gdx; m_gridy = gdy; m_gridz = gdz; m_M_eval = M; m_N_vort = N; m_PoissonSolver.m_InitialSystem(m_gridx, m_gridy, m_gridz); if(!m_isVIC){ m_SpatialHasher_eval.endSpatialHash(); m_SpatialHasher_eval.initSpatialHash(m_M_eval, m_gridx,m_gridy,m_gridz); } m_SpatialHasher_vort.endSpatialHash(); m_SpatialHasher_vort.initSpatialHash(m_N_vort, m_gridx,m_gridy,m_gridz); m_evalPos->free(); m_evalPos->alloc(m_M_eval); m_evalPos->memset(make_float4(0,0,0,0)); m_evalPos_Reorder->free(); m_evalPos_Reorder->alloc(m_M_eval); m_evalPos_Reorder->memset(make_float4(0,0,0,0)); m_p_vortPos->free(); m_p_vortPos->alloc(m_N_vort); m_p_vortPos->memset(make_float4(0,0,0,0)); m_p_vortPos_Reorder->free(); m_p_vortPos_Reorder->alloc(m_N_vort); m_p_vortPos_Reorder->memset(make_float4(0,0,0,0)); for (int i=0;i<NUM_COMPONENTS;i++) { m_grid_Rhs[i]->free(); m_grid_Rhs[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_Rhs[i]->memset(0); m_particle_vort[i]->free(); m_particle_vort[i]->alloc(m_N_vort); m_particle_vort[i]->memset(0); m_particle_vort_Reorder[i]->free(); m_particle_vort_Reorder[i]->alloc(m_N_vort); m_particle_vort_Reorder[i]->memset(0); m_grid_vort[i]->free(); m_grid_vort[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_vort[i]->memset(0); m_grid_Psi[i]->free(); m_grid_Psi[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_Psi[i]->memset(0); m_particle_U[i]->free(); m_particle_U[i]->alloc(m_M_eval); m_particle_U[i]->memset(0); m_particle_U_deorder[i]->free(); m_particle_U_deorder[i]->alloc(m_M_eval); m_particle_U_deorder[i]->memset(0); m_grid_U[i]->free(); m_grid_U[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_U[i]->memset(0); if(!m_isVIC){ m_far_U[i]->free(); m_far_U[i]->alloc(m_gridx*m_gridy*m_gridz); m_far_U[i]->memset(0); } } } } else { m_gridx = gdx; m_gridy = gdy; m_gridz = gdz; m_M_eval = M; m_N_vort = N; m_PoissonSolver.m_InitialSystem(m_gridx, m_gridy, m_gridz); if(!m_isVIC){ m_SpatialHasher_eval.initSpatialHash(m_M_eval, m_gridx,m_gridy,m_gridz); } m_SpatialHasher_vort.initSpatialHash(m_N_vort, m_gridx,m_gridy,m_gridz); m_evalPos->alloc(m_M_eval); m_evalPos->memset(make_float4(0,0,0,0)); m_evalPos_Reorder->alloc(m_M_eval); m_evalPos_Reorder->memset(make_float4(0,0,0,0)); m_p_vortPos->alloc(m_N_vort); m_p_vortPos->memset(make_float4(0,0,0,0)); m_p_vortPos_Reorder->alloc(m_N_vort); m_p_vortPos_Reorder->memset(make_float4(0,0,0,0)); for (int i=0;i<NUM_COMPONENTS;i++) { m_grid_Rhs[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_Rhs[i]->memset(0); m_particle_vort[i]->alloc(m_N_vort); m_particle_vort[i]->memset(0); m_particle_vort_Reorder[i]->alloc(m_N_vort); m_particle_vort_Reorder[i]->memset(0); m_grid_vort[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_vort[i]->memset(0); m_grid_Psi[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_Psi[i]->memset(0); m_particle_U[i]->alloc(m_M_eval); m_particle_U[i]->memset(0); m_particle_U_deorder[i]->alloc(m_M_eval); m_particle_U_deorder[i]->memset(0); m_grid_U[i]->alloc(m_gridx*m_gridy*m_gridz); m_grid_U[i]->memset(0); if(!m_isVIC){ m_far_U[i]->alloc(m_gridx*m_gridy*m_gridz); m_far_U[i]->memset(0); } } m_initialized = true; } return true; }
bool BiotSavartSolver::evaluateVelocity( GpuArrayf4 *another_end, uint is_segment ) { //m_p_vortPos->copy(gf_GpuArray<float4>::DEVICE_TO_HOST); //setDomain(m_origin, m_p_vortPos->getHostPtr(),m_N_vort,m_L); ////printf("%f,%f,%f,%f,\n",m_origin.x,m_origin.y,m_origin.z,m_L); //m_SpatialHasher_eval.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx, // make_float3(m_origin.x,m_origin.y,m_origin.z), // m_M_eval); //m_SpatialHasher_eval.setHashParam(); //m_SpatialHasher_eval.doSpatialHash(m_evalPos->getDevicePtr(),m_M_eval); //m_SpatialHasher_eval.reorderData(m_M_eval,m_evalPos->getDevicePtr(),m_evalPos_Reorder->getDevicePtr(),4,1); //m_ParticleToMesh(); //m_SolvePoisson(); //m_ComputeCurl(); //m_Intepolate(); //m_LocalCorrection(another_end); //m_unsortResult(); GpuArrayf4 *temp_pos=new GpuArrayf4; temp_pos->alloc(m_M_eval); temp_pos->memset(make_float4(0,0,0,0)); for(int i=0;i<NUM_COMPONENTS;i++) { m_particle_U[i]->memset(0); m_particle_U_deorder[i]->memset(0); } if(!m_isVIC){ m_SpatialHasher_eval.setSpatialHashGrid(m_gridx, m_L/(double)m_gridx, make_float3(m_origin.x,m_origin.y,m_origin.z), m_M_eval); m_SpatialHasher_eval.setHashParam(); m_SpatialHasher_eval.doSpatialHash(m_evalPos->getDevicePtr(),m_M_eval); m_SpatialHasher_eval.reorderData(m_M_eval, m_evalPos->getDevicePtr(),m_evalPos_Reorder->getDevicePtr(),4,1); if(is_segment==1) { m_SpatialHasher_eval.reorderData(m_M_eval,another_end->getDevicePtr(),temp_pos->getDevicePtr(),4,1); } BiotSavartInterpolateFarField(m_evalPos_Reorder->getDevicePtr(), m_far_U[0]->getDevicePtr(),m_far_U[1]->getDevicePtr(), m_far_U[2]->getDevicePtr(), m_particle_U_deorder[0]->getDevicePtr(), m_particle_U_deorder[1]->getDevicePtr(),m_particle_U_deorder[2]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x, m_gridx,m_gridy,m_gridz, m_M_eval, m_origin); BiotSavartPPCorrScaleMN(m_SpatialHasher_vort.getStartTable(), m_SpatialHasher_vort.getEndTable(), m_evalPos_Reorder->getDevicePtr(), temp_pos->getDevicePtr(), is_segment, m_p_vortPos_Reorder->getDevicePtr(), m_particle_vort_Reorder[0]->getDevicePtr(), m_particle_vort_Reorder[1]->getDevicePtr(), m_particle_vort_Reorder[2]->getDevicePtr(), m_particle_U_deorder[0]->getDevicePtr(), m_particle_U_deorder[1]->getDevicePtr(), m_particle_U_deorder[2]->getDevicePtr(), m_grid_U[0]->getDevicePtr(), m_grid_U[1]->getDevicePtr(), m_grid_U[2]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x, make_uint3(m_gridx,m_gridy,m_gridz), make_uint3(m_gridx,m_gridy,m_gridz), m_K, m_M_eval, m_N_vort, m_origin); for (int c=0;c<3;c++) { m_SpatialHasher_eval.deorderData(m_M_eval,m_particle_U_deorder[c]->getDevicePtr(),m_particle_U[c]->getDevicePtr(),1,2); } } else { BiotSavartInterpolateFarField(m_evalPos->getDevicePtr(), m_grid_U[0]->getDevicePtr(),m_grid_U[1]->getDevicePtr(), m_grid_U[2]->getDevicePtr(), m_particle_U[0]->getDevicePtr(), m_particle_U[1]->getDevicePtr(),m_particle_U[2]->getDevicePtr(), m_SpatialHasher_vort.getCellSize().x, m_gridx,m_gridy,m_gridz, m_M_eval, m_origin); } //BiotSavartComputeVelocityForOutParticle(m_evalPos->getDevicePtr(), // make_double3(m_total_vort[0],m_total_vort[1],m_total_vort[2]), // m_center, // m_SpatialHasher_vort.getWorldOrigin(), // make_float3(m_SpatialHasher_vort.getWorldOrigin().x+m_L, // m_SpatialHasher_vort.getWorldOrigin().y+m_L, // m_SpatialHasher_vort.getWorldOrigin().z+m_L), // m_particle_U[0]->getDevicePtr(), // m_particle_U[1]->getDevicePtr(), // m_particle_U[2]->getDevicePtr(), // m_M_eval); temp_pos->free(); return true; }
static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
void SSShadowMapDemo::renderPost() { Stopwatch sw( m_deviceData ); if(1) { sw.start(); ConstData cb; { XMVECTOR v; // cb.m_viewInv = XMMatrixInverse( &v, g_ViewTr ); // cb.m_projInv = XMMatrixInverse( &v, g_ProjectionTr ); cb.m_viewInv = XMMatrixInverse( &v, XMMatrixMultiply( g_ProjectionTr, g_ViewTr ) ); cb.m_width = g_wWidth; cb.m_height = g_wHeight; cb.m_shadowWeight = 0.6f/MAX_SHADOWS; } { // clear Buffer<int> normalBuffer; normalBuffer.m_srv = m_normalRT.m_srv; BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), BufferInfo( &normalBuffer, true ) }; Launcher launcher( m_deviceData, &m_clearKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( m_constBuffer, cb ); launcher.launch1D( g_wWidth*g_wHeight, 64 ); } ID3D11RenderTargetView* m_rtv; ID3D11DepthStencilView* m_dsv; DeviceDX11* dd = (DeviceDX11*)m_deviceData; dd->m_context->OMGetRenderTargets( 1, &m_rtv, &m_dsv ); for(int lightIdx=0; lightIdx<MAX_SHADOWS; lightIdx++) { XMMATRIX viewTr; XMMATRIX projTr; { // render light view dd->m_context->OMSetRenderTargets( 0, 0, m_depthBuffer[lightIdx].m_depthStencilView ); dd->m_context->ClearDepthStencilView( m_depthBuffer[lightIdx].m_depthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0 ); getMatrices<true>( m_lightPos[lightIdx], make_float4(0,0,0), make_float4(1,0,0,0), XM_PI*60.f/180.f, 1.f, 0.1f, 50.f, &viewTr, &projTr ); dispatchRenderList( g_deviceData, &g_debugRenderObj, &viewTr, &projTr ); } { // run compute shader for accumulation dd->m_context->OMSetRenderTargets( 0, 0, 0 ); { // cb.m_lightView = viewTr; // cb.m_lightProj = projTr; // == mul( mul( v, view ), proj ) in shader (Matrices are transposed) cb.m_lightView = XMMatrixMultiply( projTr, viewTr ); } Buffer<int> depthBuffer; depthBuffer.m_srv = g_depthStencil.m_srv; Buffer<int> shadowBuffer; shadowBuffer.m_srv = m_depthBuffer[lightIdx].m_srv; BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), BufferInfo( &depthBuffer, true ), BufferInfo( &shadowBuffer, true ) }; Launcher launcher( m_deviceData, &m_shadowAccmKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( m_constBuffer, cb ); launcher.launch2D( g_wWidth, g_wHeight, 8, 8 ); } } m_rtv->Release(); m_dsv->Release(); } else { ConstData cb; { XMVECTOR v; // cb.m_viewInv = XMMatrixInverse( &v, g_ViewTr ); // cb.m_projInv = XMMatrixInverse( &v, g_ProjectionTr ); cb.m_viewInv = XMMatrixInverse( &v, XMMatrixMultiply( g_ProjectionTr, g_ViewTr ) ); cb.m_width = g_wWidth; cb.m_height = g_wHeight; cb.m_shadowWeight = 0.6f/MAX_SHADOWS; } { // clear Buffer<int> normalBuffer; normalBuffer.m_srv = m_normalRT.m_srv; BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), BufferInfo( &normalBuffer, true ) }; Launcher launcher( m_deviceData, &m_clearKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( m_constBuffer, cb ); launcher.launch1D( g_wWidth*g_wHeight, 64 ); } ID3D11RenderTargetView* m_rtv; ID3D11DepthStencilView* m_dsv; DeviceDX11* dd = (DeviceDX11*)m_deviceData; dd->m_context->OMGetRenderTargets( 1, &m_rtv, &m_dsv ); XMMATRIX viewTr[MAX_SHADOWS]; XMMATRIX projTr[MAX_SHADOWS]; for(int lightIdx=0; lightIdx<MAX_SHADOWS; lightIdx++) { { // render light view dd->m_context->OMSetRenderTargets( 0, 0, m_depthBuffer[lightIdx].m_depthStencilView ); dd->m_context->ClearDepthStencilView( m_depthBuffer[lightIdx].m_depthStencilView, D3D11_CLEAR_DEPTH, 1.0f, 0 ); getMatrices<true>( m_lightPos[lightIdx], make_float4(0,0,0), make_float4(1,0,0,0), XM_PI*60.f/180.f, 1.f, 0.1f, 50.f, &viewTr[lightIdx], &projTr[lightIdx] ); dispatchRenderList( g_deviceData, &g_debugRenderObj, &viewTr[lightIdx], &projTr[lightIdx] ); } { // run compute shader for accumulation dd->m_context->OMSetRenderTargets( 0, 0, 0 ); cb.m_shadowIdx = lightIdx; Buffer<int> shadowBuffer; shadowBuffer.m_srv = m_depthBuffer[lightIdx].m_srv; BufferInfo bInfo[] = { BufferInfo( &m_lightMergedBuffer ), BufferInfo( &shadowBuffer, true ) }; Launcher launcher( m_deviceData, &m_copyShadowMapKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( m_constBuffer, cb ); launcher.launch1D( g_wWidth*g_wHeight, 64 ); } } { // resolve in a shader { cb.m_shadowIdx = MAX_SHADOWS; for(int i=0; i<MAX_SHADOWS; i++) { viewTr[i] = XMMatrixMultiply( projTr[i], viewTr[i] ); } m_lightMatrixBuffer.write( viewTr, MAX_SHADOWS ); DeviceUtils::waitForCompletion( m_deviceData ); } sw.start(); Buffer<int> depthBuffer; depthBuffer.m_srv = g_depthStencil.m_srv; Buffer<int> shadowBuffer; shadowBuffer.m_srv = m_depthBuffer[0].m_srv; BufferInfo bInfo[] = { BufferInfo( &m_shadowAccumBuffer ), BufferInfo( &depthBuffer, true ), BufferInfo( &m_lightMergedBuffer, true ), BufferInfo( &m_lightMatrixBuffer, true ) }; Launcher launcher( m_deviceData, &m_shadowAccmAllKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( m_constBuffer, cb ); launcher.launch2D( g_wWidth, g_wHeight, 8, 8 ); } m_rtv->Release(); m_dsv->Release(); } sw.stop(); { float t = sw.getMs(); m_nTxtLines = 0; sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "%dlights, %3.2fms", MAX_SHADOWS, t); } resolve( &m_shadowAccumBuffer.m_srv ); // resolveTexture( (void**)&m_colorRT.m_srv ); }
static __inline void solveContact(Constraint4& cs, const float4& posA, float4& linVelA, float4& angVelA, float invMassA, const Matrix3x3& invInertiaA, const float4& posB, float4& linVelB, float4& angVelB, float invMassB, const Matrix3x3& invInertiaB, float maxRambdaDt[4], float minRambdaDt[4]) { float4 dLinVelA = make_float4(0.f); float4 dAngVelA = make_float4(0.f); float4 dLinVelB = make_float4(0.f); float4 dAngVelB = make_float4(0.f); for(int ic=0; ic<4; ic++) { // dont necessary because this makes change to 0 if( cs.m_jacCoeffInv[ic] == 0.f ) continue; { float4 angular0, angular1, linear; float4 r0 = cs.m_worldPos[ic] - posA; float4 r1 = cs.m_worldPos[ic] - posB; setLinearAndAngular( -cs.m_linear, r0, r1, linear, angular0, angular1 ); float rambdaDt = calcRelVel(cs.m_linear, -cs.m_linear, angular0, angular1, linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; rambdaDt *= cs.m_jacCoeffInv[ic]; { float prevSum = cs.m_appliedRambdaDt[ic]; float updated = prevSum; updated += rambdaDt; updated = max2( updated, minRambdaDt[ic] ); updated = min2( updated, maxRambdaDt[ic] ); rambdaDt = updated - prevSum; cs.m_appliedRambdaDt[ic] = updated; } float4 linImp0 = invMassA*linear*rambdaDt; float4 linImp1 = invMassB*(-linear)*rambdaDt; float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; #ifdef _WIN32 btAssert(_finite(linImp0.x)); btAssert(_finite(linImp1.x)); #endif if( JACOBI ) { dLinVelA += linImp0; dAngVelA += angImp0; dLinVelB += linImp1; dAngVelB += angImp1; } else { linVelA += linImp0; angVelA += angImp0; linVelB += linImp1; angVelB += angImp1; } } } if( JACOBI ) { linVelA += dLinVelA; angVelA += dAngVelA; linVelB += dLinVelB; angVelB += dAngVelB; } }
int volumetric_knt_cuda(int argc, char **argv) { Timer timer; int vol_size = vx_count * vx_size; float half_vol_size = vol_size * 0.5f; Eigen::Vector3i voxel_size(vx_size, vx_size, vx_size); Eigen::Vector3i volume_size(vol_size, vol_size, vol_size); Eigen::Vector3i voxel_count(vx_count, vx_count, vx_count); int total_voxels = voxel_count.x() * voxel_count.y() * voxel_count.z(); std::cout << std::fixed << "Voxel Count : " << voxel_count.transpose() << std::endl << "Voxel Size : " << voxel_size.transpose() << std::endl << "Volume Size : " << volume_size.transpose() << std::endl << "Total Voxels : " << total_voxels << std::endl << std::endl; timer.start(); KinectFrame knt(filepath); timer.print_interval("Importing knt frame : "); Eigen::Affine3f grid_affine = Eigen::Affine3f::Identity(); grid_affine.translate(Eigen::Vector3f(0, 0, half_vol_size)); grid_affine.scale(Eigen::Vector3f(1, 1, 1)); // z is negative inside of screen Eigen::Matrix4f grid_matrix = grid_affine.matrix(); float knt_near_plane = 0.1f; float knt_far_plane = 10240.0f; Eigen::Matrix4f projection = perspective_matrix<float>(KINECT_V2_FOVY, KINECT_V2_DEPTH_ASPECT_RATIO, knt_near_plane, knt_far_plane); Eigen::Matrix4f projection_inverse = projection.inverse(); Eigen::Matrix4f view_matrix = Eigen::Matrix4f::Identity(); std::vector<float4> vertices(knt.depth.size(), make_float4(0, 0, 0, 1)); std::vector<float4> normals(knt.depth.size(), make_float4(0, 0, 1, 1)); std::vector<Eigen::Vector2f> grid_voxels_params(total_voxels); // // setup image parameters // unsigned short image_width = KINECT_V2_DEPTH_WIDTH; unsigned short image_height = image_width / aspect_ratio; QImage img(image_width, image_height, QImage::Format::Format_RGBA8888); img.fill(Qt::GlobalColor::gray); uchar4* image_data = (uchar4*)img.bits(); //float4* debug_buffer = new float4[image_width * image_height]; //memset(debug_buffer, 0, image_width * image_height * sizeof(float4)); knt_cuda_setup( vx_count, vx_size, grid_matrix.data(), projection.data(), projection_inverse.data(), *grid_voxels_params.data()->data(), KINECT_V2_DEPTH_WIDTH, KINECT_V2_DEPTH_HEIGHT, KINECT_V2_DEPTH_MIN, KINECT_V2_DEPTH_MAX, vertices.data()[0], normals.data()[0], image_width, image_height ); timer.start(); knt_cuda_allocate(); knt_cuda_init_grid(); timer.print_interval("Allocating gpu : "); timer.start(); knt_cuda_copy_host_to_device(); knt_cuda_copy_depth_buffer_to_device(knt.depth.data()); timer.print_interval("Copy host to device : "); timer.start(); knt_cuda_normal_estimation(); timer.print_interval("Normal estimation : "); timer.start(); knt_cuda_update_grid(view_matrix.data()); timer.print_interval("Update grid : "); timer.start(); knt_cuda_grid_params_copy_device_to_host(); knt_cuda_copy_device_to_host(); timer.print_interval("Copy device to host : "); // // setup camera parameters // timer.start(); Eigen::Affine3f camera_to_world = Eigen::Affine3f::Identity(); float cam_z = -half_vol_size; camera_to_world.scale(Eigen::Vector3f(1, 1, -1)); camera_to_world.translate(Eigen::Vector3f(half_vol_size, half_vol_size, cam_z)); Eigen::Matrix4f camera_to_world_matrix = camera_to_world.matrix(); knt_cuda_raycast(KINECT_V2_FOVY, KINECT_V2_DEPTH_ASPECT_RATIO, camera_to_world_matrix.data()); timer.print_interval("Raycast : "); timer.start(); knt_cuda_copy_image_device_to_host(*(uchar4*)img.bits()); timer.print_interval("Copy Img to host : "); timer.start(); knt_cuda_free(); timer.print_interval("Cleanup gpu : "); #if 0 //memset(image_data, 0, image_width * image_height * sizeof(uchar4)); //memset(debug_buffer, 0, image_width * image_height * sizeof(float4)); Eigen::Vector3f camera_pos = camera_to_world_matrix.col(3).head<3>(); float fov_scale = (float)tan(DegToRad(KINECT_V2_FOVY * 0.5f)); float aspect_ratio = KINECT_V2_DEPTH_ASPECT_RATIO; // // for each pixel, trace a ray // timer.start(); for (int y = 0; y < image_height; ++y) { for (int x = 0; x < image_width; ++x) { // Convert from image space (in pixels) to screen space // Screen Space along X axis = [-aspect ratio, aspect ratio] // Screen Space along Y axis = [-1, 1] float x_norm = (2.f * float(x) + 0.5f) / (float)image_width; float y_norm = (2.f * float(y) + 0.5f) / (float)image_height; Eigen::Vector3f screen_coord( (x_norm - 1.f) * aspect_ratio * fov_scale, (1.f - y_norm) * fov_scale, 1.0f); Eigen::Vector3f direction; multDirMatrix(screen_coord, camera_to_world_matrix, direction); direction.normalize(); long voxels_zero_crossing[2] = { -1, -1 }; int hit_count = raycast_tsdf_volume<float>( camera_pos, direction, voxel_count.cast<int>(), voxel_size.cast<int>(), grid_voxels_params, voxels_zero_crossing); if (hit_count > 0) { if (hit_count == 2) { float4 n = normals[y * image_width + x]; //image_data[y * image_width + x].x = 0; //image_data[y * image_width + x].y = 128; //image_data[y * image_width + x].z = 128; //image_data[y * image_width + x].w = 255; image_data[y * image_width + x].x = uchar((n.x * 0.5f + 0.5f) * 255); image_data[y * image_width + x].y = uchar((n.y * 0.5f + 0.5f) * 255); image_data[y * image_width + x].z = uchar((n.z * 0.5f + 0.5f) * 255); image_data[y * image_width + x].w = 255; } else { image_data[y * image_width + x].x = 128; image_data[y * image_width + x].y = 128; image_data[y * image_width + x].z = 0; image_data[y * image_width + x].w = 255; } } else { image_data[y * image_width + x].x = 128; image_data[y * image_width + x].y = 0; image_data[y * image_width + x].z = 0; image_data[y * image_width + x].w = 255; } } } timer.print_interval("Raycasting to image : "); //export_debug_buffer("../../data/cpu_image_data_screen_coord_f4.txt", debug_buffer, image_width, image_height); //export_image_buffer("../../data/cpu_image_data_screen_coord_uc.txt", image_data, image_width, image_height); #else //export_debug_buffer("../../data/gpu_image_data_screen_coord_f4.txt", debug_buffer, image_width, image_height); //export_image_buffer("../../data/gpu_image_data_screen_coord_uc.txt", image_data, image_width, image_height); #endif QImage image(&image_data[0].x, image_width, image_height, QImage::Format_RGBA8888); //image.fill(Qt::GlobalColor::black); QApplication app(argc, argv); QImageWidget widget; widget.resize(KINECT_V2_DEPTH_WIDTH, KINECT_V2_DEPTH_HEIGHT); widget.setImage(image); widget.show(); return app.exec(); }