//TEST (PCL_GPU, DISABLED_bruteForceRadiusSeachGPU) TEST (PCL_GPU, bruteForceRadiusSeachGPU) { DataGenerator data; data.data_size = 871000; data.tests_num = 100; data.cube_size = 1024.f; data.max_radius = data.cube_size/15.f; data.shared_radius = data.cube_size/20.f; data.printParams(); //generate data(); // brute force radius search data.bruteForceSearch(); //prepare gpu cloud pcl::gpu::Octree::PointCloud cloud_device; cloud_device.upload(data.points); pcl::gpu::DeviceArray<int> results_device, buffer(cloud_device.size()); vector<int> results_host; vector<size_t> sizes; for(size_t i = 0; i < data.tests_num; ++i) { pcl::gpu::bruteForceRadiusSearchGPU(cloud_device, data.queries[i], data.radiuses[i], results_device, buffer); results_device.download(results_host); std::sort(results_host.begin(), results_host.end()); ASSERT_EQ ( (results_host == data.bfresutls[i]), true ); sizes.push_back(results_device.size()); } float avg_size = std::accumulate(sizes.begin(), sizes.end(), (size_t)0) * (1.f/sizes.size());; cout << "avg_result_size = " << avg_size << endl; ASSERT_GT(avg_size, 5); }
//TEST(PCL_OctreeGPU, DISABLED_batchRadiusSearch) TEST(PCL_OctreeGPU, batchRadiusSearch) { DataGenerator data; data.data_size = 871000; data.tests_num = 10000; data.cube_size = 1024.f; data.max_radius = data.cube_size/30.f; data.shared_radius = data.cube_size/30.f; data.printParams(); const int max_answers = 333; //generate data(); //prepare gpu cloud pcl::gpu::Octree::PointCloud cloud_device; cloud_device.upload(data.points); //gpu build pcl::gpu::Octree octree_device; octree_device.setCloud(cloud_device); octree_device.build(); //upload queries pcl::gpu::Octree::Queries queries_device; pcl::gpu::Octree::Radiuses radiuses_device; queries_device.upload(data.queries); radiuses_device.upload(data.radiuses); //prepare output buffers on device pcl::gpu::NeighborIndices result_device1(queries_device.size(), max_answers); pcl::gpu::NeighborIndices result_device2(queries_device.size(), max_answers); pcl::gpu::NeighborIndices result_device3(data.indices.size(), max_answers); //prepare output buffers on host vector< vector<int> > host_search1(data.tests_num); vector< vector<int> > host_search2(data.tests_num); for(size_t i = 0; i < data.tests_num; ++i) { host_search1[i].reserve(max_answers); host_search2[i].reserve(max_answers); } //search GPU shared octree_device.radiusSearch(queries_device, data.shared_radius, max_answers, result_device1); //search GPU individual octree_device.radiusSearch(queries_device, radiuses_device, max_answers, result_device2); //search GPU shared with indices pcl::gpu::Octree::Indices indices; indices.upload(data.indices); octree_device.radiusSearch(queries_device, indices, data.shared_radius, max_answers, result_device3); //search CPU octree_device.internalDownload(); for(size_t i = 0; i < data.tests_num; ++i) { octree_device.radiusSearchHost(data.queries[i], data.shared_radius, host_search1[i], max_answers); octree_device.radiusSearchHost(data.queries[i], data.radiuses[i], host_search2[i], max_answers); } //download results vector<int> sizes1; vector<int> sizes2; vector<int> sizes3; result_device1.sizes.download(sizes1); result_device2.sizes.download(sizes2); result_device3.sizes.download(sizes3); vector<int> downloaded_buffer1, downloaded_buffer2, downloaded_buffer3, results_batch; result_device1.data.download(downloaded_buffer1); result_device2.data.download(downloaded_buffer2); result_device3.data.download(downloaded_buffer3); //data.bruteForceSearch(); //verify results for(size_t i = 0; i < data.tests_num; ++i) { vector<int>& results_host = host_search1[i]; int beg = i * max_answers; int end = beg + sizes1[i]; results_batch.assign(downloaded_buffer1.begin() + beg, downloaded_buffer1.begin() + end); std::sort(results_batch.begin(), results_batch.end()); std::sort(results_host.begin(), results_host.end()); if ((int)results_batch.size() == max_answers && results_batch.size() < results_host.size() && max_answers) results_host.resize(max_answers); ASSERT_EQ ( ( results_batch == results_host ), true ); //vector<int>& results_bf = data.bfresutls[i]; //ASSERT_EQ ( ( results_bf == results_batch), true ); //ASSERT_EQ ( ( results_bf == results_host ), true ); } float avg_size1 = std::accumulate(sizes1.begin(), sizes1.end(), 0) * (1.f/sizes1.size()); cout << "avg_result_size1 = " << avg_size1 << endl; ASSERT_GT(avg_size1, 5); //verify results for(size_t i = 0; i < data.tests_num; ++i) { vector<int>& results_host = host_search2[i]; int beg = i * max_answers; int end = beg + sizes2[i]; results_batch.assign(downloaded_buffer2.begin() + beg, downloaded_buffer2.begin() + end); std::sort(results_batch.begin(), results_batch.end()); std::sort(results_host.begin(), results_host.end()); if ((int)results_batch.size() == max_answers && results_batch.size() < results_host.size() && max_answers) results_host.resize(max_answers); ASSERT_EQ ( ( results_batch == results_host ), true ); //vector<int>& results_bf = data.bfresutls[i]; //ASSERT_EQ ( ( results_bf == results_batch), true ); //ASSERT_EQ ( ( results_bf == results_host ), true ); } float avg_size2 = std::accumulate(sizes2.begin(), sizes2.end(), 0) * (1.f/sizes2.size()); cout << "avg_result_size2 = " << avg_size2 << endl; ASSERT_GT(avg_size2, 5); //verify results for(size_t i = 0; i < data.tests_num; i+=2) { vector<int>& results_host = host_search1[i]; int beg = i/2 * max_answers; int end = beg + sizes3[i/2]; results_batch.assign(downloaded_buffer3.begin() + beg, downloaded_buffer3.begin() + end); std::sort(results_batch.begin(), results_batch.end()); std::sort(results_host.begin(), results_host.end()); if ((int)results_batch.size() == max_answers && results_batch.size() < results_host.size() && max_answers) results_host.resize(max_answers); ASSERT_EQ ( ( results_batch == results_host ), true ); //vector<int>& results_bf = data.bfresutls[i]; //ASSERT_EQ ( ( results_bf == results_batch), true ); //ASSERT_EQ ( ( results_bf == results_host ), true ); } float avg_size3 = std::accumulate(sizes3.begin(), sizes3.end(), 0) * (1.f/sizes3.size()); cout << "avg_result_size3 = " << avg_size3 << endl; ASSERT_GT(avg_size3, 5); }
//TEST(PCL_OctreeGPU, DISABLED_hostRadiusSearch) TEST(PCL_OctreeGPU, hostRadiusSearch) { DataGenerator data; data.data_size = 871000; data.tests_num = 10000; data.cube_size = 1024.f; data.max_radius = data.cube_size/15.f; data.shared_radius = data.cube_size/20.f; data.printParams(); //generate data(); //prepare device cloud pcl::gpu::Octree::PointCloud cloud_device; cloud_device.upload(data.points); //prepare host cloud pcl::PointCloud<pcl::PointXYZ>::Ptr cloud_host(new pcl::PointCloud<pcl::PointXYZ>); cloud_host->width = data.points.size(); cloud_host->height = 1; cloud_host->points.resize (cloud_host->width * cloud_host->height); std::transform(data.points.begin(), data.points.end(), cloud_host->points.begin(), DataGenerator::ConvPoint<pcl::PointXYZ>()); // build device octree pcl::gpu::Octree octree_device; octree_device.setCloud(cloud_device); octree_device.build(); // build host octree float resolution = 25.f; cout << "[!]Octree resolution: " << resolution << endl; pcl::octree::OctreePointCloudSearch<pcl::PointXYZ> octree_host(resolution); octree_host.setInputCloud (cloud_host); octree_host.addPointsFromInputCloud (); //perform bruteForceSearch data.bruteForceSearch(true); vector<int> sizes; sizes.reserve(data.tests_num); octree_device.internalDownload(); for(size_t i = 0; i < data.tests_num; ++i) { //search host on octree tha was built on device vector<int> results_host_gpu; //host search octree_device.radiusSearchHost(data.queries[i], data.radiuses[i], results_host_gpu); //search host vector<float> dists; vector<int> results_host; octree_host.radiusSearch(pcl::PointXYZ(data.queries[i].x, data.queries[i].y, data.queries[i].z), data.radiuses[i], results_host, dists); std::sort(results_host_gpu.begin(), results_host_gpu.end()); std::sort(results_host.begin(), results_host.end()); ASSERT_EQ ( (results_host_gpu == results_host ), true ); ASSERT_EQ ( (results_host_gpu == data.bfresutls[i]), true ); sizes.push_back(results_host.size()); } float avg_size = std::accumulate(sizes.begin(), sizes.end(), 0) * (1.f/sizes.size());; cout << "avg_result_size = " << avg_size << endl; ASSERT_GT(avg_size, 5); }
//TEST(PCL_OctreeGPU, DISABLED_approxNearesSearch) TEST(PCL_OctreeGPU, approxNearesSearch) { DataGenerator data; data.data_size = 871000; data.tests_num = 10000; data.cube_size = 1024.f; data.max_radius = data.cube_size/30.f; data.shared_radius = data.cube_size/30.f; data.printParams(); const float host_octree_resolution = 25.f; //generate data(); //prepare device cloud pcl::gpu::Octree::PointCloud cloud_device; cloud_device.upload(data.points); //prepare host cloud pcl::PointCloud<pcl::PointXYZ>::Ptr cloud_host(new pcl::PointCloud<pcl::PointXYZ>); cloud_host->width = data.points.size(); cloud_host->height = 1; cloud_host->points.resize (cloud_host->width * cloud_host->height); std::transform(data.points.begin(), data.points.end(), cloud_host->points.begin(), DataGenerator::ConvPoint<pcl::PointXYZ>()); //gpu build pcl::gpu::Octree octree_device; octree_device.setCloud(cloud_device); octree_device.build(); //build host octree pcl::octree::OctreePointCloudSearch<pcl::PointXYZ> octree_host(host_octree_resolution); octree_host.setInputCloud (cloud_host); octree_host.addPointsFromInputCloud(); //upload queries pcl::gpu::Octree::Queries queries_device; queries_device.upload(data.queries); //prepare output buffers on device pcl::gpu::NeighborIndices result_device(data.tests_num, 1); vector<int> result_host_pcl(data.tests_num); vector<int> result_host_gpu(data.tests_num); vector<float> dists_pcl(data.tests_num); vector<float> dists_gpu(data.tests_num); //search GPU shared octree_device.approxNearestSearch(queries_device, result_device); vector<int> downloaded; result_device.data.download(downloaded); for(size_t i = 0; i < data.tests_num; ++i) { octree_host.approxNearestSearch(data.queries[i], result_host_pcl[i], dists_pcl[i]); octree_device.approxNearestSearchHost(data.queries[i], result_host_gpu[i], dists_gpu[i]); } ASSERT_EQ ( ( downloaded == result_host_gpu ), true ); int count_gpu_better = 0; int count_pcl_better = 0; float diff_pcl_better = 0; for(size_t i = 0; i < data.tests_num; ++i) { float diff = dists_pcl[i] - dists_gpu[i]; bool gpu_better = diff > 0; ++(gpu_better ? count_gpu_better : count_pcl_better); if (!gpu_better) diff_pcl_better +=fabs(diff); } diff_pcl_better /=count_pcl_better; cout << "count_gpu_better: " << count_gpu_better << endl; cout << "count_pcl_better: " << count_pcl_better << endl; cout << "avg_diff_pcl_better: " << diff_pcl_better << endl; }