void TestVectorOfObjects::run(size_t count, size_t updates) { PerfTimer perf; perf.start(); std::vector<Particle> particles(count); perf.stop(&_creationTime); // randomize: no sense in this case... /*for (size_t i = 0; i < count / 2; ++i) { int a = rand() % count; int b = rand() % count; std::swap(particles[a], particles[b]); }*/ _memoryKb = (particles.capacity()*sizeof(Particle)) / 1024.0; for (auto p = particles.begin(); p != particles.end(); ++p) p->generate(); perf.start(); for (size_t u = 0; u < updates; ++u) { for (auto p = particles.begin(); p != particles.end(); ++p) p->update(DELTA_TIME); } perf.stop(&_updatesTime); }
void TestVectorOfPointers::run(size_t count, size_t updates) { PerfTimer perf; perf.start(); std::vector<std::shared_ptr<Particle>> particles(count); for (auto p = particles.begin(); p != particles.end(); ++p) { *p = std::make_shared<Particle>(); } perf.stop(&_creationTime); // randomize to simulate for (size_t i = 0; i < count / 2; ++i) { int a = rand() % count; int b = rand() % count; if (a != b) std::swap(particles[a], particles[b]); } /*for (int i = 0; i < 10; ++i) { std::cout << (unsigned long)particles[i].get() << std::endl; }*/ _memoryKb = (particles.capacity()*sizeof(Particle)) / 1024.0; for (auto p = particles.begin(); p != particles.end(); ++p) (*p)->generate(); perf.start(); for (size_t u = 0; u < updates; ++u) { for (auto p = particles.begin(); p != particles.end(); ++p) (*p)->update(DELTA_TIME); } perf.stop(&_updatesTime); }
// loading/unloading // ---------------------------------------------------------------------------------------------- // Loads a file and turns it into a runtime resource bool ResourceManager::LoadResource(Resource * res, const WCHAR* filename) { bool ok = false; // try to create resource if (!FileUtils::Exists(filename)) { Logger::Log(OutputMessageType::Error, L"Failed to load file, '%ls' -- does not exist\n", filename); return false; } PerfTimer timer; timer.Start(); // get factory associated with 'filename' ResourceFactory * factory = GetFactory(filename); if (!factory) { return false; } ok = factory->LoadResource(res, filename); if (ok) { res->SetReady(); timer.Stop(); Logger::Log(OutputMessageType::Debug, L"%d ms Loaded %ls\n", timer.ElapsedMilliseconds(), FileUtils::Name(filename)); } else { timer.Stop(); Logger::Log(OutputMessageType::Error, L"%d ms failed to load %ls\n", timer.ElapsedMilliseconds(), filename); } return ok; }
CLerror CLElectrosFunctor<T>::LoadKernels ( size_t deviceID ) { PerfTimer timer; timer.start(); FunctorData &data = m_functors[deviceID]; cout<<" Reading kernel source"<<endl; using std::ifstream; ifstream reader("Electrostatics.cl.c", ifstream::in); if (!reader.good()) { cout<<"Cannot open program source"<<endl; return -1; } reader.seekg (0, std::ios::end); size_t length = reader.tellg(); reader.seekg (0, std::ios::beg); char *source = new char[length]; reader.read(source, length); reader.close(); /* * Different devices require different work group sizes to operate * optimally. The amount of __local memory on some kernels depends on these * work-group sizes. This causes a problem as explained below: * There are two ways to use group-local memory * 1) Allocate it as a parameter with clSetKernelArg() * 2) Declare it as a constant __local array within the cl kernel * Option (1) has the advantage of flexibility, but the extra indexing * overhead is a performance killer (20-25% easily lost on nvidia GPUs) * Option (2) has the advantage that the compiler knows the arrays are of * constant size, and is free to do extreme optimizations. * Of course, then both host and kernel have to agree on the size of the * work group. * We abuse the fact that the source code is compiled at runtime, decide * those sizes in the host code, then #define them in the kernel code, * before it is compiled. */ // BLOCK size data.local = {BLOCK_X, 1, 1}; size_t local_MT[3] = {BLOCK_X_MT, BLOCK_Y_MT, 1}; // GRID size data.global = {((this->m_nLines + BLOCK_X - 1)/BLOCK_X) * BLOCK_X, 1, 1 }; data.global[0] /= data.vecWidth; data.local[0] /= data.vecWidth; cout<<"Local : "<<data.local[0]<<" "<<data.local[1]<<" " <<data.local[2]<<endl; cout<<"Local_MT: "<<local_MT[0]<<" "<<local_MT[1]<<" "<<local_MT[2]<<endl; cout<<"Global : "<<data.global[0]<<" "<<data.global[1]<<" " <<data.global[2]<<endl; char defines[1024]; const size_t kernelSteps = this->m_pFieldLinesData->GetSize() / this->m_nLines; snprintf(defines, sizeof(defines), "#define BLOCK_X %u\n" "#define BLOCK_X_MT %u\n" "#define BLOCK_Y_MT %u\n" "#define KERNEL_STEPS %u\n" "#define Tprec %s\n" "#define Tvec %s\n", (unsigned int) data.local[0], (unsigned int) local_MT[0], (unsigned int)local_MT[1], (unsigned int) kernelSteps, FindPrecType(), FindVecType(data.vecWidth) ); cout<<" Calc'ed kern steps "<<kernelSteps<<endl; char *srcs[2] = {defines, source}; CLerror err; cl_program prog = clCreateProgramWithSource(data.context, 2, (const char**) srcs, NULL, &err); if (err)cout<<"clCreateProgramWithSource returns: "<<err<<endl; char options[] = "-cl-fast-relaxed-math"; err = clBuildProgram(prog, 0, NULL, options, NULL, NULL); if (err)cout<<"clBuildProgram returns: "<<err<<endl; size_t logSize; clGetProgramBuildInfo(prog, data.device->deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize); char * log = (char*)malloc(logSize); clGetProgramBuildInfo(prog, data.device->deviceID, CL_PROGRAM_BUILD_LOG, logSize, log, 0); cout<<"Program Build Log:"<<endl<<log<<endl; CL_ASSERTE(err, "clBuildProgram failed"); data.perfData.add(TimingInfo("Program compilation", timer.tick())); //========================================================================== cout<<" Preparing kernel"<<endl; data.kernel = clCreateKernel(prog, "CalcField_curvature", &err); CL_ASSERTE(err, "clCreateKernel"); return CL_SUCCESS; }
unsigned long CLElectrosFunctor<T>::MainFunctor ( size_t functorIndex, ///< Functor whose data to process size_t deviceIndex ///< Device on which to process data ) { if(functorIndex != deviceIndex) cerr<<"WARNING: Different functor and device"<<endl; PerfTimer timer; FunctorData &funData = m_functors[functorIndex]; FunctorData &devData = m_functors[deviceIndex]; perfPacket &profiler = devData.perfData; timer.start(); CLerror err; cl_context ctx = devData.context; cout<<" Preparing buffers"<<endl; Vector3<cl_mem> &arrdata = devData.devFieldMem; cl_mem &charges = devData.chargeMem; cl_kernel &kernel = devData.kernel; err = CL_SUCCESS; // __global float *x, err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &arrdata.x); // __global float *y, err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &arrdata.y); // __global float *z, err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &arrdata.z); // __global pointCharge *Charges, err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &charges); // const unsigned int linePitch, cl_uint param = this->m_nLines; err |= clSetKernelArg(kernel, 4, sizeof(param), ¶m); // const unsigned int p, param = (cl_uint)this->m_pPointChargeData->GetSize(); err |= clSetKernelArg(kernel, 5, sizeof(param), ¶m); // const unsigned int fieldIndex, param = 1; err |= clSetKernelArg(kernel, 6, sizeof(param), ¶m); // const float resolution T res = this->m_resolution; err |= clSetKernelArg(kernel, 7, sizeof(res), &res); if (err)cout<<"clSetKernelArg cummulates: "<<err<<endl; //========================================================================== cl_command_queue queue = clCreateCommandQueue(ctx, devData.device->deviceID, 0, &err); if (err)cout<<"clCreateCommandQueue returns: "<<err<<endl; timer.tick(); Vector3<T*> hostArr = this->m_pFieldLinesData->GetDataPointers(); const size_t start = funData.startIndex; const size_t size = funData.elements * sizeof(T) * funData.steps; err = CL_SUCCESS; err |= clEnqueueWriteBuffer(queue, arrdata.x, CL_FALSE, 0, size, &hostArr.x[start], 0, NULL, NULL); if (err)cout<<"Write 1 returns: "<<err<<endl; err |= clEnqueueWriteBuffer(queue, arrdata.y, CL_FALSE, 0, size, &hostArr.y[start], 0, NULL, NULL); if (err)cout<<"Write 2 returns: "<<err<<endl; err |= clEnqueueWriteBuffer(queue, arrdata.z, CL_FALSE, 0, size, &hostArr.z[start], 0, NULL, NULL); if (err)cout<<"Write 3 returns: "<<err<<endl; const size_t qSize = this->m_pPointChargeData->GetSizeBytes(); err |= clEnqueueWriteBuffer(queue, charges, CL_FALSE, 0, qSize, this->m_pPointChargeData->GetDataPointer(), 0, NULL, NULL); if (err)cout<<"Write 4 returns: "<<err<<endl; CL_ASSERTE(err, "Sending data to device failed"); // Finish memory copies before starting the kernel CL_ASSERTE(clFinish(queue), "Pre-kernel sync"); profiler.add(TimingInfo("Host to device transfer", timer.tick(), 3*size + qSize )); //========================================================================== cout<<" Executing kernel"<<endl; timer.tick(); err |= clEnqueueNDRangeKernel(queue, kernel, 3, NULL, funData.global, funData.local, 0, NULL, NULL); if (err)cout<<"clEnqueueNDRangeKernel returns: "<<err<<endl; // Let kernel finish before continuing CL_ASSERTE(clFinish(queue), "Post-kernel sync"); double time = timer.tick(); this->m_pPerfData->time = time; this->m_pPerfData->performance = ( this->m_nLines * ( ( 2500-1 ) * ( this->m_pPointChargeData->GetSize() * ( electroPartFieldFLOP + 3 ) + 13 ) ) / time ) / 1E9; profiler.add(TimingInfo("Kernel execution time", time)); //========================================================================== cout<<" Recovering results"<<endl; timer.tick(); err = CL_SUCCESS; err |= clEnqueueReadBuffer ( queue, arrdata.x, CL_FALSE, 0, size, hostArr.x, 0, NULL, NULL ); if (err)cout<<" Read 1 returns: "<<err<<endl; err |= clEnqueueReadBuffer ( queue, arrdata.y, CL_FALSE, 0, size, hostArr.y, 0, NULL, NULL ); if (err)cout<<" Read 2 returns: "<<err<<endl; err |= clEnqueueReadBuffer ( queue, arrdata.z, CL_FALSE, 0, size, hostArr.z, 0, NULL, NULL ); if (err)cout<<" Read 3 returns: "<<err<<endl; if (err)cout<<"clEnqueueReadBuffer cummulates: "<<err<<endl; clFinish(queue); profiler.add(TimingInfo("Device to host transfer", timer.tick(), 3 * size)); return CL_SUCCESS; }
bool RagGraphPlanner::findLocalTrajectory(const Controller::State &cbegin, GenWorkspaceChainState::Seq::const_iterator wbegin, GenWorkspaceChainState::Seq::const_iterator wend, Controller::Trajectory &trajectory, Controller::Trajectory::iterator iter, MSecTmU32 timeOut) { CriticalSectionWrapper csw(csCommand); #ifdef _HBGRAPHPLANNER_PERFMON PerfTimer t; #ifdef _HBHEURISTIC_PERFMON HBHeuristic::resetLog(); HBCollision::resetLog(); #endif #endif HBHeuristic *heuristic = getHBHeuristic(); if (heuristic/* && heuristic->enableUnc*/) enableHandPlanning(); // context.write("findLocalTrajectory(): %s\n", hbplannerDebug(*this).c_str()); //context.write("RagGraphPlanner::findLocalTrajectory: %s\n", hbplannerConfigspaceDebug(*this).c_str()); //context.write("RagGraphPlanner::findLocalTrajectory: %s\n", hbplannerWorkspaceDebug(*this).c_str()); // trajectory size const size_t size = 1 + (size_t)(wend - wbegin); // check initial size if (size < 2) { context.error("GraphPlanner::findLocalTrajectory(): Invalid workspace sequence size\n"); return false; } // time out const MSecTmU32 segTimeOut = timeOut == MSEC_TM_U32_INF ? MSEC_TM_U32_INF : timeOut / MSecTmU32(size - 1); // fill trajectory with cbegin const Controller::State cinit = cbegin; // backup Controller::Trajectory::iterator end = ++trajectory.insert(iter, cinit); for (GenWorkspaceChainState::Seq::const_iterator i = wbegin; i != wend; ++i) end = ++trajectory.insert(end, cinit); Controller::Trajectory::iterator begin = end - size; getCallbackDataSync()->syncCollisionBounds(); optimisedPath.resize(size - 1); population.assign(1, cbegin.cpos); // always keep initial solution in case no transformation is required pKinematics->setPopulation(&population); pKinematics->setDistRootFac(pKinematics->getDesc().distRootLocalFac); // find configspace trajectory PARAMETER_GUARD(Heuristic, GenCoordConfigspace, Min, *pHeuristic); PARAMETER_GUARD(Heuristic, GenCoordConfigspace, Max, *pHeuristic); for (size_t i = 1; i < size; ++i) { // pointers const Controller::Trajectory::iterator c[2] = { begin + i - 1, begin + i }; const GenWorkspaceChainState::Seq::const_iterator w = wbegin + i - 1; // setup search limits GenCoordConfigspace min = pHeuristic->getMin(); GenCoordConfigspace max = pHeuristic->getMin(); for (Configspace::Index j = stateInfo.getJoints().begin(); j < stateInfo.getJoints().end(); ++j) { const idx_t k = j - stateInfo.getJoints().begin(); min[j].pos = c[0]->cpos[j] - localFinderDesc.range[k]; max[j].pos = c[0]->cpos[j] + localFinderDesc.range[k]; } pHeuristic->setMin(min); pHeuristic->setMax(max); // and search for a solution if (!pKinematics->findGoal(*c[0], *w, *c[1], segTimeOut)) { context.error("GraphPlanner::findLocalTrajectory(): unable to solve inverse kinematics\n"); return false; } // visualisation optimisedPath[i - 1].cpos = c[1]->cpos; optimisedPath[i - 1].wpos = w->wpos; } // profile configspace trajectory pProfile->profile(trajectory, begin, end); getCallbackDataSync()->syncFindTrajectory(begin, end, &*(wend - 1)); #ifdef _HBGRAPHPLANNER_PERFMON context.write("GraphPlanner::findLocalTrajectory(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), size); #ifdef _HBHEURISTIC_PERFMON if (heuristic) { context.write("Enabled Uncertainty %s\n", heuristic->enableUnc ? "ON" : "OFF"); //heuristic->writeLog(context, "GraphPlanner::findTarget()"); heuristic->getCollision()->writeLog(context, "GraphPlanner::findTarget()");; } #endif #endif if (heuristic/* && heuristic->enableUnc*/) disableHandPlanning(); return true; }
bool RagGraphPlanner::findGlobalTrajectory(const Controller::State &begin, const Controller::State &end, Controller::Trajectory &trajectory, Controller::Trajectory::iterator iter, const GenWorkspaceChainState* wend) { CriticalSectionWrapper csw(csCommand); #ifdef _HBGRAPHPLANNER_PERFMON PerfTimer t; #endif #ifdef _HBGRAPHPLANNER_PERFMON #ifdef _HBHEURISTIC_PERFMON HBHeuristic::resetLog(); HBCollision::resetLog(); #endif #ifdef _BOUNDS_PERFMON Bounds::resetLog(); #endif #endif getCallbackDataSync()->syncCollisionBounds(); #ifdef _HBGRAPHPLANNER_PERFMON t.reset(); #endif // generate global graph only for the arm context.debug("GraphPlanner::findGlobalTrajectory(): Enabled Uncertainty %s. disable hand planning...\n", getHBHeuristic()->enableUnc ? "ON" : "OFF"); disableHandPlanning(); // context.write("findGlobalTrajectory(): %s\n", hbplannerDebug(*this).c_str()); //context.write("GraphPlanner::findGlobalTrajectory(): %s\n", hbplannerConfigspaceDebug(*this).c_str()); //context.write("GraphPlanner::findGlobalTrajectory(): %s\n", hbplannerWorkspaceDebug(*this).c_str()); // generate global graph pGlobalPathFinder->generateOnlineGraph(begin.cpos, end.cpos); // find node path on global graph globalPath.clear(); if (!pGlobalPathFinder->findPath(end.cpos, globalPath, globalPath.begin())) { context.error("GlobalPathFinder::findPath(): unable to find global path\n"); return false; } #ifdef _HBGRAPHPLANNER_PERFMON context.write( "GlobalPathFinder::findPath(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), globalPath.size() ); #endif if (pLocalPathFinder != NULL) { #ifdef _HBGRAPHPLANNER_PERFMON t.reset(); #endif PARAMETER_GUARD(Heuristic, Real, Scale, *pHeuristic); for (U32 i = 0;;) { localPath = globalPath; if (localFind(begin.cpos, end.cpos, localPath)) break; else if (++i > pathFinderDesc.numOfTrials) { context.error("LocalPathFinder::findPath(): unable to find local path\n"); return false; } } #ifdef _HBGRAPHPLANNER_PERFMON context.write( "LocalPathFinder::findPath(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), localPath.size() ); #endif // copy localPath optimisedPath = localPath; } else { // copy globalPath optimisedPath = globalPath; } #ifdef _HBGRAPHPLANNER_PERFMON #ifdef _HBHEURISTIC_PERFMON // context.debug("Enabled Uncertainty %s\n", getHBHeuristic()->enableUnc ? "ON" : "OFF"); //HBHeuristic::writeLog(context, "PathFinder::find()"); HBCollision::writeLog(context, "PathFinder::find()"); #endif #ifdef _BOUNDS_PERFMON Bounds::writeLog(context, "PathFinder::find()"); #endif #endif #ifdef _HBGRAPHPLANNER_PERFMON #ifdef _HBHEURISTIC_PERFMON HBHeuristic::resetLog(); HBCollision::resetLog(); #endif #ifdef _BOUNDS_PERFMON Bounds::resetLog(); #endif t.reset(); #endif optimize(optimisedPath, begin.cacc, end.cacc); #ifdef _HBGRAPHPLANNER_PERFMON context.write( "GraphPlanner::optimize(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), optimisedPath.size() ); #ifdef _HBHEURISTIC_PERFMON HBHeuristic::writeLog(context, "GraphPlanner::optimize()"); HBCollision::writeLog(context, "GraphPlanner::optimize()"); #endif #ifdef _BOUNDS_PERFMON Bounds::writeLog(context, "GraphPlanner::optimize()"); #endif #endif #ifdef _HBGRAPHPLANNER_PERFMON t.reset(); #endif Controller::Trajectory::iterator iend = iter; pProfile->create(optimisedPath.begin(), optimisedPath.end(), begin, end, trajectory, iter, iend); pProfile->profile(trajectory, iter, iend); #ifdef _HBGRAPHPLANNER_PERFMON context.write( "GraphPlanner::profile(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), trajectory.size() ); #endif getCallbackDataSync()->syncFindTrajectory(trajectory.begin(), trajectory.end(), wend); return true; }
bool RagGraphPlanner::findTarget(const GenConfigspaceState &begin, const GenWorkspaceChainState& wend, GenConfigspaceState &cend) { CriticalSectionWrapper csw(csCommand); HBHeuristic *heuristic = getHBHeuristic(); if (heuristic) { enableUnc = heuristic->enableUnc; heuristic->enableUnc = false; context.debug("RagGraphPlanner::findTarget(): enable unc %s\n", heuristic->enableUnc ? "ON" : "OFF"); } // TODO: Find why the pre-grasp pose returns with close fingers disableHandPlanning(); // context.debug("findTarget: %s\n", hbplannerDebug(*this).c_str()); //context.write("RagGraphPlanner::findTarget: %s\n", hbplannerConfigspaceDebug(*this).c_str()); //context.write("RagGraphPlanner::findTarget: %s\n", hbplannerWorkspaceDebug(*this).c_str()); #ifdef _HBHEURISTIC_PERFMON heuristic->resetLog(); heuristic->getCollision()->resetLog(); #endif #ifdef _HBGRAPHPLANNER_PERFMON PerfTimer t; #endif getCallbackDataSync()->syncCollisionBounds(); // generate graph pGlobalPathFinder->generateOnlineGraph(begin.cpos, wend.wpos); // create waypoints pointers const Waypoint::Seq& graph = pGlobalPathFinder->getGraph(); WaypointPtr::Seq waypointPtrGraph; waypointPtrGraph.reserve(graph.size()); for (U32 i = 0; i < graph.size(); ++i) waypointPtrGraph.push_back(WaypointPtr(&graph[i])); // Create waypoint population const U32 populationSize = std::min(U32(pKinematics->getDesc().populationSize), (U32)waypointPtrGraph.size()); // sort waypoints (pointers) from the lowest to the highest cost std::partial_sort(waypointPtrGraph.begin(), waypointPtrGraph.begin() + populationSize, waypointPtrGraph.end(), WaypointPtr::cost_less()); // create initial population for kinematics solver ConfigspaceCoord::Seq population; population.reserve(populationSize); for (WaypointPtr::Seq::const_iterator i = waypointPtrGraph.begin(); population.size() < populationSize && i != waypointPtrGraph.end(); ++i) population.push_back((*i)->cpos); pKinematics->setPopulation(&population); // set global root distance factor pKinematics->setDistRootFac(pKinematics->getDesc().distRootGlobalFac); GenConfigspaceState root; root.setToDefault(controller.getStateInfo().getJoints().begin(), controller.getStateInfo().getJoints().end()); root.cpos = graph[Node::IDX_ROOT].cpos; // find the goal state if (!pKinematics->findGoal(root, wend, cend)) { context.error("GraphPlanner::findTarget(): unable to find target\n"); return false; } cend.t = wend.t; cend.cvel.fill(REAL_ZERO); cend.cacc.fill(REAL_ZERO); #ifdef _HBGRAPHPLANNER_PERFMON context.write( "GraphPlanner::findTarget(): time_elapsed = %f [sec]\n", t.elapsed() ); #endif #ifdef _HBHEURISTIC_PERFMON heuristic->writeLog(context, "GraphPlanner::findTarget()"); heuristic->getCollision()->writeLog(context, "GraphPlanner::findTarget()");; #endif if (heuristic) heuristic->enableUnc = enableUnc; //enableHandPlanning(); // context.write("RagGraphPlanner::findTarget(): done.\n"); return true; }
int main(int argc, char *argv[]) { if (argc == 2) { push_to_graphite = std::string(argv[1]) == "graphite"; } const char* pattern = ":1234567\r\n+3.14159\r\n"; std::string input; for (int i = 0; i < TEST_ITERATIONS/2; i++) { input += pattern; } std::vector<double> timedeltas; uint64_t intvalue; Byte buffer[RESPStream::STRING_LENGTH_MAX]; for (int i = N_TESTS; i --> 0;) { PerfTimer tm; MemStreamReader stream(input.data(), input.size()); RESPStream protocol(&stream); for (int j = TEST_ITERATIONS; j --> 0;) { auto type = protocol.next_type(); switch(type) { case RESPStream::INTEGER: intvalue = protocol.read_int(); if (intvalue != 1234567) { std::cerr << "Bad int value at " << j << std::endl; return -1; } break; case RESPStream::STRING: { int len = protocol.read_string(buffer, sizeof(buffer)); if (len != 7) { std::cerr << "Bad string value at " << j << std::endl; return -1; } char *p = buffer; double res = strtod(buffer, &p); if (abs(res - 3.14159) > 0.0001) { std::cerr << "Can't parse float at " << j << std::endl; return -1; } } break; case RESPStream::ARRAY: case RESPStream::BAD: case RESPStream::BULK_STR: case RESPStream::ERROR: default: std::cerr << "Error at " << j << std::endl; return -1; }; } timedeltas.push_back(tm.elapsed()); } double min = std::numeric_limits<double>::max(); for (auto t: timedeltas) { min = std::min(min, t); } std::cout << "Parsing " << TEST_ITERATIONS << " messages in " << min << " sec." << std::endl; if (push_to_graphite) { push_metric_to_graphite("respstream", 1000.0*min); } return 0; }
int main(int argc, char** argv) { const uint64_t N_TIMESTAMPS = 1000; const uint64_t N_PARAMS = 100; UncompressedChunk header; std::cout << "Testing timestamp sequence" << std::endl; int c = 100; std::vector<aku_ParamId> ids; for (uint64_t id = 0; id < N_PARAMS; id++) { ids.push_back(id); } RandomWalk rwalk(10.0, 0.0, 0.01, N_PARAMS); for (uint64_t id = 0; id < N_PARAMS; id++) { for (uint64_t ts = 0; ts < N_TIMESTAMPS; ts++) { header.paramids.push_back(ids[id]); int k = rand() % 2; if (k) { c++; } else if (c > 0) { c--; } header.timestamps.push_back((ts + c) << 8); header.values.push_back(rwalk.generate(0)); } } ByteVector out; out.resize(N_PARAMS*N_TIMESTAMPS*24); const size_t UNCOMPRESSED_SIZE = header.paramids.size()*8 // Didn't count lengths and offsets + header.timestamps.size()*8 // because because this arrays contains + header.values.size()*8; // no information and should be compressed // to a few bytes struct Writer : ChunkWriter { ByteVector *out; Writer(ByteVector *out) : out(out) {} virtual aku_MemRange allocate() { aku_MemRange range = { out->data(), static_cast<uint32_t>(out->size()) }; return range; } //! Commit changes virtual aku_Status commit(size_t bytes_written) { out->resize(bytes_written); return AKU_SUCCESS; } }; Writer writer(&out); aku_Timestamp tsbegin, tsend; uint32_t n; auto status = CompressionUtil::encode_chunk(&n, &tsbegin, &tsend, &writer, header); if (status != AKU_SUCCESS) { std::cout << "Encoding error" << std::endl; return 1; } // Compress using zlib // Ids copy (zlib need all input data to be aligned because it uses SSE2 internally) Bytef* pgz_ids = (Bytef*)aligned_alloc(64, header.paramids.size()*8); memcpy(pgz_ids, header.paramids.data(), header.paramids.size()*8); // Timestamps copy Bytef* pgz_ts = (Bytef*)aligned_alloc(64, header.timestamps.size()*8); memcpy(pgz_ts, header.timestamps.data(), header.timestamps.size()*8); // Values copy Bytef* pgz_val = (Bytef*)aligned_alloc(64, header.values.size()*8); memcpy(pgz_val, header.values.data(), header.values.size()*8); const auto gz_max_size = N_PARAMS*N_TIMESTAMPS*24; Bytef* pgzout = (Bytef*)aligned_alloc(64, gz_max_size); uLongf gzoutlen = gz_max_size; size_t total_gz_size = 0, id_gz_size = 0, ts_gz_size = 0, float_gz_size = 0; // compress param ids auto zstatus = compress(pgzout, &gzoutlen, pgz_ids, header.paramids.size()*8); if (zstatus != Z_OK) { std::cout << "GZip error" << std::endl; exit(zstatus); } total_gz_size += gzoutlen; id_gz_size = gzoutlen; gzoutlen = gz_max_size; // compress timestamps zstatus = compress(pgzout, &gzoutlen, pgz_ts, header.timestamps.size()*8); if (zstatus != Z_OK) { std::cout << "GZip error" << std::endl; exit(zstatus); } total_gz_size += gzoutlen; ts_gz_size = gzoutlen; gzoutlen = gz_max_size; // compress floats zstatus = compress(pgzout, &gzoutlen, pgz_val, header.values.size()*8); if (zstatus != Z_OK) { std::cout << "GZip error" << std::endl; exit(zstatus); } total_gz_size += gzoutlen; float_gz_size = gzoutlen; const float GZ_BPE = float(total_gz_size)/header.paramids.size(); const float GZ_RATIO = float(UNCOMPRESSED_SIZE)/float(total_gz_size); const size_t COMPRESSED_SIZE = out.size(); const float BYTES_PER_EL = float(COMPRESSED_SIZE)/header.paramids.size(); const float COMPRESSION_RATIO = float(UNCOMPRESSED_SIZE)/COMPRESSED_SIZE; std::cout << "Uncompressed: " << UNCOMPRESSED_SIZE << std::endl << " compressed: " << COMPRESSED_SIZE << std::endl << " elements: " << header.paramids.size() << std::endl << " bytes/elem: " << BYTES_PER_EL << std::endl << " ratio: " << COMPRESSION_RATIO << std::endl ; std::cout << "Gzip stats: " << std::endl << "bytes/elem: " << GZ_BPE << std::endl << " ratio: " << GZ_RATIO << std::endl << " id bytes: " << id_gz_size << std::endl << " ts bytes: " << ts_gz_size << std::endl << " val bytes: " << float_gz_size << std::endl; // Try to decompress UncompressedChunk decomp; const unsigned char* pbegin = out.data(); const unsigned char* pend = pbegin + out.size(); CompressionUtil::decode_chunk(&decomp, pbegin, pend, header.timestamps.size()); bool first_error = true; for (auto i = 0u; i < header.timestamps.size(); i++) { if (header.timestamps.at(i) != decomp.timestamps.at(i) && first_error) { std::cout << "Error, bad timestamp at " << i << std::endl; first_error = false; } if (header.paramids.at(i) != decomp.paramids.at(i) && first_error) { std::cout << "Error, bad paramid at " << i << std::endl; first_error = false; } double origvalue = header.values.at(i); double decvalue = decomp.values.at(i); if (origvalue != decvalue && first_error) { std::cout << "Error, bad value at " << i << std::endl; std::cout << "Expected: " << origvalue << std::endl; std::cout << "Actual: " << decvalue << std::endl; first_error = false; } } if (argc == 2 && std::string(argv[1]) == "benchmark") { // Bench compression process const int NRUNS = 1000; PerfTimer tm; aku_Status tstatus; volatile uint32_t vn; ByteVector vec; for (int i = 0; i < NRUNS; i++) { vec.resize(N_PARAMS*N_TIMESTAMPS*24); Writer w(&vec); aku_Timestamp ts; uint32_t n; tstatus = CompressionUtil::encode_chunk(&n, &ts, &ts, &w, header); if (tstatus != AKU_SUCCESS) { std::cout << "Encoding error" << std::endl; return 1; } vn = n; } double elapsed = tm.elapsed(); std::cout << "Elapsed (akumuli): " << elapsed << " " << vn << std::endl; tm.restart(); for (int i = 0; i < NRUNS; i++) { uLongf offset = 0; // compress param ids auto zstatus = compress(pgzout, &gzoutlen, pgz_ids, header.paramids.size()*8); if (zstatus != Z_OK) { std::cout << "GZip error" << std::endl; exit(zstatus); } offset += gzoutlen; gzoutlen = gz_max_size - offset; // compress timestamps zstatus = compress(pgzout + offset, &gzoutlen, pgz_ts, header.timestamps.size()*8); if (zstatus != Z_OK) { std::cout << "GZip error" << std::endl; exit(zstatus); } offset += gzoutlen; gzoutlen = gz_max_size - offset; // compress floats zstatus = compress(pgzout + offset, &gzoutlen, pgz_val, header.values.size()*8); if (zstatus != Z_OK) { std::cout << "GZip error" << std::endl; exit(zstatus); } } elapsed = tm.elapsed(); std::cout << "Elapsed (zlib): " << elapsed << " " << vn << std::endl; } }
// this callback is called every second void callback(void*) { PerfTimer timer; initGraphics(); _gl_main->redraw(); //Fl::repeat_timeout(.5, callback); //return; if (!_gl_main->pause_video->value()) { Fl::repeat_timeout(_tempo, callback); return; } //refresh the control bar _wind->child(2)->redraw(); // for performance reasons... _gl_main->_camera->setActiveSensor(_gl_main->_calibrated_camera); // check database first if (_gl_main->_database == NULL) { // grab ladybug images if (_gl_main->_ladybug->_init) { _gl_main->_ladybug->grab(_gl_main->_camera->_frame._grab_img); _gl_main->_camera->_frame.processFrame(); _tempo = 0.05; } else { _tempo = 1.0; } } // save frame if recording if ( _gl_main->_ladybug_recording ) { _tempo = .5; _gl_main->_ladybug->grab(_gl_main->_camera->_frame._grab_img); // grab an image on the ladybug _gl_main->_camera->_frame.processFrame(); for (int sensorId = 0; sensorId < 6; sensorId++ ) { std::string filename = _gl_main->_database->_dirname + "//" + _gl_main->_database->getLadybugImageFilename( sensorId, _gl_main->frameId ); cvSaveImage( filename.c_str(), _gl_main->_camera->_frame._tmp_img[sensorId] ); } _gl_main->_ladybug_record_nimages++; _gl_main->frameId++; _gl_main->_database->_frameId = _gl_main->frameId; _gl_main->redraw(); } // compute FAST features and update the tracker if ( _gl_main->view_flow->value() ) { } // refresh the main window if ((_gl_main->_windowMode == MODE_VIDEO) || (_gl_main->_windowMode == MODE_CALIBRATION)) _gl_main->redraw(); // refresh the main window if (_gl_main->_windowMode == MODE_SPHERE) _gl_main->redraw(); _gl_control->_perf_time = timer.elapsed(); //Fl::repeat_timeout(MAX(0.05,1.5*_gl_control->_perf_time), callback); Fl::repeat_timeout(_tempo, callback); }
void TIGERImport( const ChainInfoMap& chains, const LandmarkInfoMap& landmarks, const PolygonInfoMap& polygons, Pmwx& outMap) { // Our planar map MUST be empty! // First we go in and insert every segment from the TIGER database into our map. // We keep a table from coordinates into the map so we can avoid doing a search // when we have a segment that is already at least partially inserted. VertexIndex vertices; EdgeIndex edges; FaceIndex faces; set<TLID> badTLIDs; int gSegs = 0, gBad = 0, gDupes = 0; set<TLID> tlids; #if DO_CHECKS map<string, TLID> lines; #endif for (ChainInfoMap::const_iterator chain = chains.begin(); chain != chains.end(); ++chain) { int i = LookupNetCFCC(chain->second.cfcc.c_str()); vector<RawCoordPair> pts = chain->second.shape; pts.insert(pts.begin(), chain->second.start); pts.insert(pts.end(), chain->second.end); if (tlids.find(chain->first) != tlids.end()) { // printf("WARNING: about to dupe a TLID!\n"); continue; } tlids.insert(chain->first); for (int n = 1; n < pts.size(); ++n) { ++gSegs; #if DO_CHECKS string masterkey, k1 = RawCoordToKey(pts[n-1]), k2 = RawCoordToKey(pts[n]); if (k1 < k2) masterkey = k1 + k2; else masterkey = k2 + k1; map<string, TLID>::iterator tlidCheck = lines.find(masterkey); if (tlidCheck != lines.end()) { printf("WARNING: already did this seg (by key), old TLID = %ul, new TLID = %ul!\n", tlidCheck->second, chain->first); printf("Sequence in question is: %s\n", tlidCheck->first.c_str()); continue; } lines.insert(map<string, TLID>::value_type(masterkey, chain->first)); #endif try { Pmwx::Halfedge_handle he = InsertOneSegment(pts[n-1], pts[n], vertices, outMap); if (he != outMap.halfedges_end()) { // InsertOneSegment always returns the dominant half-edge. Tag it with // our road type, underpassing info, and our TLID. if (i != -1) { GISNetworkSegment_t nl; nl.type = kRoadCodes[i].network_type; he->mSegments.push_back(nl); he->mParams[gis_TIGER_IsUnderpassing] = kRoadCodes[i].underpassing; } he->mParams[gis_TIGER_TLID] = chain->first; he->twin()->mParams[gis_TIGER_TLID] = chain->first; edges[chain->first] = he; } else { printf("Got dupe seg, CFCC = %s, name = %s, tlid = %d\n", chain->second.cfcc.c_str(), chain->second.name.c_str(), chain->first); ++gDupes; } } catch (...) { ++gBad; printf("Got bad seg, CFCC = %s, name = %s, tlid = %d\n", chain->second.cfcc.c_str(), chain->second.name.c_str(), chain->first); badTLIDs.insert(chain->first); } if ((gSegs % 10000) == 0) { fprintf(stdout, "."); fflush(stdout); } } } std::cout << "\nTotal " << gSegs << " bad " << gBad << " dupes " << gDupes << "\n"; double elapsed; unsigned long calls; double ave; zeroV.GetStats(elapsed, calls); ave = elapsed / (double) calls; printf("In-face insertion: %f total, %d calls, %f average.\n", elapsed, calls, ave); oneV.GetStats(elapsed, calls); ave = elapsed / (double) calls; printf("One-V Insertion: %f total, %d calls, %f average.\n", elapsed, calls, ave); twoV.GetStats(elapsed, calls); ave = elapsed / (double) calls; printf("Two-V insertion: %f total, %d calls, %f average.\n", elapsed, calls, ave); // Now we go in and apply our polygon data. We have set the dominant flag to be the halfedge // that goes in the same direction as the tiger database. Since CGAL faces have CCW outer // boundaries, that means that the left hand poly of a TLID is adjacent to the dominant // halfedge. int gPolys = 0, gMissingTLIDs = 0, gBadEdges = 0, gBadBackLink = 0, gDeadTLID = 0; for (PolygonInfoMap::const_iterator poly = polygons.begin(); poly != polygons.end(); ++poly) { if (poly->first == WORLD_POLY) continue; ++gPolys; set<TLID> ourTLIDs; for (DirectedTLIDVector::const_iterator t = poly->second.border.begin(); t != poly->second.border.end(); ++t) ourTLIDs.insert(t->first); vector<TLID> ourBads; set_intersection(ourTLIDs.begin(), ourTLIDs.end(), badTLIDs.begin(), badTLIDs.end(), back_insert_iterator<vector<TLID> >(ourBads)); if (!ourBads.empty()) { printf("Skipped Polygon because one of its TLIDs is missing from the DB!\n"); ++gDeadTLID; continue; } EdgeIndex::iterator edgeIter = edges.find(*ourTLIDs.begin()); ChainInfoMap::const_iterator tlidIter = chains.find(*ourTLIDs.begin()); if (edgeIter != edges.end() && tlidIter != chains.end()) { Pmwx::Face_handle our_face = outMap.faces_end(); Pmwx::Halfedge_handle he = edgeIter->second; if (!he->mDominant) he = he->twin(); if (!he->mDominant) { ++gBadEdges; printf("WARNING: Halfedge with no dominance!!\n"); continue; } if (poly->first == tlidIter->second.lpoly) { our_face = he->face(); } else if (poly->first == tlidIter->second.rpoly) { our_face = he->twin()->face(); } else { printf("WARNING: TLID from poly not backlinked to our poly!\n"); ++gBadBackLink; } if (our_face != outMap.faces_end()) { if (poly->second.water) { our_face->mLandClass = lc_GenericWater; } faces[poly->first] = our_face; } } else ++gMissingTLIDs; } printf("Polygons: %d, missing TLIDs from indices: %d, edges with no dominance: %d, bad back links: %d, dead TLIDS: %d\n", gPolys, gMissingTLIDs, gBadEdges, gBadBackLink, gDeadTLID); int gLand = 0, gNoID = 0, gNoLocAtAll = 0, gPtOnEdge = 0; for (LandmarkInfoMap::const_iterator landmark = landmarks.begin(); landmark != landmarks.end(); ++landmark) { ++gLand; int cfcc = LookupAreaCFCC(landmark->second.cfcc.c_str()); if (cfcc != -1) { if (!landmark->second.cenid_polyid.empty()) { FaceIndex::iterator theFace = faces.find(landmark->second.cenid_polyid); if (theFace != faces.end()) { theFace->second->mLandClass = kAreaCodes[cfcc].land_class; } else { fprintf(stderr, "WARNING: Cenid/polyid not found.\n"); ++gNoID; } } else if (!landmark->second.location.first.empty()) { if (kAreaCodes[cfcc].allow_from_point) { try { Pmwx::Locate_type lt; Pmwx::Halfedge_handle h = outMap.locate(RawCoordToCoord(landmark->second.location), lt); if (lt == Pmwx::EDGE || lt == Pmwx::FACE) { h->face()->mLandClass = kAreaCodes[cfcc].land_class; } else { ++gPtOnEdge; fprintf(stderr, "WARNING: Pt land mark on vertex or out of map.\n"); } } catch (...) { ++gPtOnEdge; } } else { // TODO: Add pt object } } else { fprintf(stderr, "Warning: landmark without polygon or pt.\n"); gNoLocAtAll++; } } } printf("Total landmarks = %d, total with unknown CENID/POLYID = %d, no Loc = %d, pt on edge = %d\n", gLand, gNoID, gNoLocAtAll, gPtOnEdge); }
Pmwx::Halfedge_handle InsertOneSegment( const RawCoordPair& p1, const RawCoordPair& p2, VertexIndex& index, Pmwx& ioMap) { string key1 = RawCoordToKey(p1); Point_2 pt1 = RawCoordToCoord(p1); string key2 = RawCoordToKey(p2); Point_2 pt2 = RawCoordToCoord(p2); VertexIndex::iterator i1 = index.find(key1); VertexIndex::iterator i2 = index.find(key2); Pmwx::Halfedge_handle he = Pmwx::Halfedge_handle(); #if 0 Pmwx::Locate_type loc1, loc2; ioMap.locate(pt1, loc1); ioMap.locate(pt2, loc2); CGAL_precondition_msg(loc1 != Pmwx::EDGE, "Pt1 on an edge, will cause CHAOS"); CGAL_precondition_msg(loc2 != Pmwx::EDGE, "Pt2 on an edge, will cause CHAOS"); if (i1 == index.end()) CGAL_precondition_msg(loc1 != Pmwx::VERTEX, "Pt1 on an unindexed vertex, will cause CHAOS"); if (i2 == index.end()) CGAL_precondition_msg(loc2 != Pmwx::VERTEX, "Pt2 on an unindexed vertex, will cause CHAOS"); #endif if (i1 == index.end()) { if (i2 == index.end()) { // Totally unknown segment. Pmwx::Locate_type lt; zeroV.Start(); he = ioMap.locate(pt1, lt); CGAL_precondition_msg(lt == Pmwx::FACE || lt == Pmwx::UNBOUNDED_FACE, "Inserting a segment in unknown territory but it's NOT on a face!!"); Pmwx::Face_handle fe = (lt == Pmwx::UNBOUNDED_FACE) ? ioMap.unbounded_face() : he->face(); // he = ioMap.non_intersecting_insert(PM_Curve_2(pt1, pt2)); he = ioMap.insert_in_face_interior(PM_Curve_2(pt1, pt2), fe); zeroV.Stop(); if (he != Pmwx::Halfedge_handle()) { index[key1] = he->source(); index[key2] = he->target(); } } else { // We know pt 2 but pt 1 is floating. Make a vector // using the vertex handle from 2 and 1's raw value. oneV.Start(); he = ioMap.Planar_map_2::insert_from_vertex( PM_Curve_2(i2->second->point(), pt1), i2->second); oneV.Stop(); // Now pt 1 gets stored...it is the target of the new halfedge. if (he != Pmwx::Halfedge_handle()) { index[key1] = he->target(); he = he->twin(); // This halfedge goes from 2 to 1, turn it around! } } } else { if (i2 == index.end()) { oneV.Start(); // We know pt 1 but not pt 2 he = ioMap.Planar_map_2::insert_from_vertex( PM_Curve_2(i1->second->point(), pt2), i1->second); oneV.Stop(); // Now pt 1 gets stored...it is the target of the new halfedge. if (he != Pmwx::Halfedge_handle()) index[key2] = he->target(); } else { twoV.Start(); // Both pts are known he = ioMap.Planar_map_2::insert_at_vertices( PM_Curve_2(i1->second->point(), i2->second->point()), i1->second, i2->second); twoV.Stop(); } } if (he == Pmwx::Halfedge_handle()) { return ioMap.halfedges_end(); } // Whenever we create a half edge we have to pick dominance...this works. he->mDominant = true; return he; }
//------------------------------------------------------------------------------------------------------------- // UTILITY FUNCTIONS //------------------------------------------------------------------------------------------------------------- bool Shader::CompileShaders(ID3D11Device* device, const ShaderDesc& desc) { constexpr const char * SHADER_BINARY_EXTENSION = ".bin"; mDescriptor = desc; HRESULT result; ShaderBlobs blobs; bool bPrinted = false; PerfTimer timer; timer.Start(); // COMPILE SHADER STAGES //---------------------------------------------------------------------------- for (const ShaderStageDesc& stageDesc : desc.stages) { if (stageDesc.fileName.empty()) continue; // stage.macros const std::string sourceFilePath = std::string(Renderer::sShaderRoot + stageDesc.fileName); const EShaderStage stage = GetShaderTypeFromSourceFilePath(sourceFilePath); // USE SHADER CACHE // const size_t ShaderHash = GeneratePreprocessorDefinitionsHash(stageDesc.macros); const std::string cacheFileName = stageDesc.macros.empty() ? DirectoryUtil::GetFileNameFromPath(sourceFilePath) + SHADER_BINARY_EXTENSION : DirectoryUtil::GetFileNameFromPath(sourceFilePath) + "_" + std::to_string(ShaderHash) + SHADER_BINARY_EXTENSION; const std::string cacheFilePath = Application::s_ShaderCacheDirectory + "\\" + cacheFileName; const bool bUseCachedShaders = DirectoryUtil::FileExists(cacheFilePath) && !IsCacheDirty(sourceFilePath, cacheFilePath); //--------------------------------------------------------------------------------- if (!bPrinted) // quick status print here { const char* pMsgLoad = bUseCachedShaders ? "Loading cached shader binaries" : "Compiling shader from source"; Log::Info("\t%s %s...", pMsgLoad, mName.c_str()); bPrinted = true; } //--------------------------------------------------------------------------------- if (bUseCachedShaders) { blobs.of[stage] = CompileFromCachedBinary(cacheFilePath); } else { std::string errMsg; ID3D10Blob* pBlob; if (CompileFromSource(sourceFilePath, stage, pBlob, errMsg, stageDesc.macros)) { blobs.of[stage] = pBlob; CacheShaderBinary(cacheFilePath, blobs.of[stage]); } else { Log::Error(errMsg); return false; } } CreateShaderStage(device, stage, blobs.of[stage]->GetBufferPointer(), blobs.of[stage]->GetBufferSize()); SetReflections(blobs); //CheckSignatures(); ShaderLoadDesc loadDesc = {}; loadDesc.fullPath = sourceFilePath; loadDesc.lastWriteTime = std::experimental::filesystem::last_write_time(sourceFilePath); mDirectories[stage] = loadDesc; } // INPUT LAYOUT (VS) //--------------------------------------------------------------------------- // src: https://stackoverflow.com/questions/42388979/directx-11-vertex-shader-reflection // setup the layout of the data that goes into the shader // if(mReflections.vsRefl) { D3D11_SHADER_DESC shaderDesc = {}; mReflections.vsRefl->GetDesc(&shaderDesc); std::vector<D3D11_INPUT_ELEMENT_DESC> inputLayout(shaderDesc.InputParameters); D3D_PRIMITIVE primitiveDesc = shaderDesc.InputPrimitive; for (unsigned i = 0; i < shaderDesc.InputParameters; ++i) { D3D11_SIGNATURE_PARAMETER_DESC paramDesc; mReflections.vsRefl->GetInputParameterDesc(i, ¶mDesc); // fill out input element desc D3D11_INPUT_ELEMENT_DESC elementDesc; elementDesc.SemanticName = paramDesc.SemanticName; elementDesc.SemanticIndex = paramDesc.SemanticIndex; elementDesc.InputSlot = 0; elementDesc.AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT; elementDesc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA; elementDesc.InstanceDataStepRate = 0; // determine DXGI format if (paramDesc.Mask == 1) { if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32) elementDesc.Format = DXGI_FORMAT_R32_UINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32) elementDesc.Format = DXGI_FORMAT_R32_SINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32_FLOAT; } else if (paramDesc.Mask <= 3) { if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32) elementDesc.Format = DXGI_FORMAT_R32G32_UINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32) elementDesc.Format = DXGI_FORMAT_R32G32_SINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32_FLOAT; } else if (paramDesc.Mask <= 7) { if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32) elementDesc.Format = DXGI_FORMAT_R32G32B32_UINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32) elementDesc.Format = DXGI_FORMAT_R32G32B32_SINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32B32_FLOAT; } else if (paramDesc.Mask <= 15) { if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32) elementDesc.Format = DXGI_FORMAT_R32G32B32A32_UINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32) elementDesc.Format = DXGI_FORMAT_R32G32B32A32_SINT; else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; } inputLayout[i] = elementDesc; //save element desc } // Try to create Input Layout const auto* pData = inputLayout.data(); if (pData) { result = device->CreateInputLayout( pData, shaderDesc.InputParameters, blobs.vs->GetBufferPointer(), blobs.vs->GetBufferSize(), &mpInputLayout); if (FAILED(result)) { OutputDebugString("Error creating input layout"); return false; } } } // CONSTANT BUFFERS //--------------------------------------------------------------------------- // Obtain cbuffer layout information for (EShaderStage type = EShaderStage::VS; type < EShaderStage::COUNT; type = (EShaderStage)(type + 1)) { if (mReflections.of[type]) { ReflectConstantBufferLayouts(mReflections.of[type], type); } } // Create CPU & GPU constant buffers // CPU CBuffers int constantBufferSlot = 0; for (const ConstantBufferLayout& cbLayout : m_CBLayouts) { std::vector<CPUConstantID> cpuBuffers; for (D3D11_SHADER_VARIABLE_DESC varDesc : cbLayout.variables) { CPUConstant c; CPUConstantID c_id = static_cast<CPUConstantID>(mCPUConstantBuffers.size()); c._name = varDesc.Name; c._size = varDesc.Size; c._data = new char[c._size]; memset(c._data, 0, c._size); m_constants.push_back(std::make_pair(constantBufferSlot, c_id)); mCPUConstantBuffers.push_back(c); } ++constantBufferSlot; } // GPU CBuffers D3D11_BUFFER_DESC cBufferDesc; cBufferDesc.Usage = D3D11_USAGE_DYNAMIC; cBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; cBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; cBufferDesc.MiscFlags = 0; cBufferDesc.StructureByteStride = 0; for (const ConstantBufferLayout& cbLayout : m_CBLayouts) { ConstantBufferBinding cBuffer; cBufferDesc.ByteWidth = cbLayout.desc.Size; if (FAILED(device->CreateBuffer(&cBufferDesc, NULL, &cBuffer.data))) { OutputDebugString("Error creating constant buffer"); return false; } cBuffer.dirty = true; cBuffer.shaderStage = cbLayout.stage; cBuffer.bufferSlot = cbLayout.bufSlot; mConstantBuffers.push_back(cBuffer); } // TEXTURES & SAMPLERS //--------------------------------------------------------------------------- for (int shaderStage = 0; shaderStage < EShaderStage::COUNT; ++shaderStage) { unsigned texSlot = 0; unsigned smpSlot = 0; unsigned uavSlot = 0; auto& sRefl = mReflections.of[shaderStage]; if (sRefl) { D3D11_SHADER_DESC desc = {}; sRefl->GetDesc(&desc); for (unsigned i = 0; i < desc.BoundResources; ++i) { D3D11_SHADER_INPUT_BIND_DESC shdInpDesc; sRefl->GetResourceBindingDesc(i, &shdInpDesc); switch (shdInpDesc.Type) { case D3D_SIT_SAMPLER: { SamplerBinding smp; smp.shaderStage = static_cast<EShaderStage>(shaderStage); smp.samplerSlot = smpSlot++; mSamplerBindings.push_back(smp); mShaderSamplerLookup[shdInpDesc.Name] = static_cast<int>(mSamplerBindings.size() - 1); } break; case D3D_SIT_TEXTURE: { TextureBinding tex; tex.shaderStage = static_cast<EShaderStage>(shaderStage); tex.textureSlot = texSlot++; mTextureBindings.push_back(tex); mShaderTextureLookup[shdInpDesc.Name] = static_cast<int>(mTextureBindings.size() - 1); } break; case D3D_SIT_UAV_RWTYPED: { TextureBinding tex; tex.shaderStage = static_cast<EShaderStage>(shaderStage); tex.textureSlot = uavSlot++; mTextureBindings.push_back(tex); mShaderTextureLookup[shdInpDesc.Name] = static_cast<int>(mTextureBindings.size() - 1); } break; case D3D_SIT_CBUFFER: break; default: Log::Warning("Unhandled shader input bind type in shader reflection"); break; } // switch shader input type } // bound resource } // sRefl } // shaderStage // release blobs for (unsigned type = EShaderStage::VS; type < EShaderStage::COUNT; ++type) { if (blobs.of[type]) blobs.of[type]->Release(); } return true; }
int _tmain(int argc, _TCHAR* argv[]) { // Sample 1: float image, 1 band, with some pixels set to invalid / void, maxZError = 0.1 int h = 512; int w = 512; float* zImg = new float[w * h]; memset(zImg, 0, w * h * sizeof(float)); LercNS::BitMask bitMask(w, h); bitMask.SetAllValid(); for (int k = 0, i = 0; i < h; i++) { for (int j = 0; j < w; j++, k++) { zImg[k] = sqrt((float)(i * i + j * j)); // smooth surface zImg[k] += rand() % 20; // add some small amplitude noise if (j % 100 == 0 || i % 100 == 0) // set some void points bitMask.SetInvalid(k); } } // compress into byte arr double maxZErrorWanted = 0.1; double eps = 0.0001; // safety margin (optional), to account for finite floating point accuracy double maxZError = maxZErrorWanted - eps; size_t numBytesNeeded = 0; size_t numBytesWritten = 0; Lerc lerc; PerfTimer pt; if (!lerc.ComputeBufferSize((void*)zImg, // raw image data, row by row, band by band Lerc::DT_Float, w, h, 1, &bitMask, // set 0 if all pixels are valid maxZError, // max coding error per pixel, or precision numBytesNeeded)) // size of outgoing Lerc blob { cout << "ComputeBufferSize failed" << endl; } size_t numBytesBlob = numBytesNeeded; Byte* pLercBlob = new Byte[numBytesBlob]; pt.start(); if (!lerc.Encode((void*)zImg, // raw image data, row by row, band by band Lerc::DT_Float, w, h, 1, &bitMask, // 0 if all pixels are valid maxZError, // max coding error per pixel, or precision pLercBlob, // buffer to write to, function will fail if buffer too small numBytesBlob, // buffer size numBytesWritten)) // num bytes written to buffer { cout << "Encode failed" << endl; } pt.stop(); double ratio = w * h * (0.125 + sizeof(float)) / numBytesBlob; cout << "sample 1 compression ratio = " << ratio << ", encode time = " << pt.ms() << " ms" << endl; // new data storage float* zImg3 = new float[w * h]; memset(zImg3, 0, w * h * sizeof(float)); BitMask bitMask3(w, h); bitMask3.SetAllValid(); // decompress Lerc::LercInfo lercInfo; if (!lerc.GetLercInfo(pLercBlob, numBytesBlob, lercInfo)) cout << "get header info failed" << endl; if (lercInfo.nCols != w || lercInfo.nRows != h || lercInfo.nBands != 1 || lercInfo.dt != Lerc::DT_Float) cout << "got wrong lerc info" << endl; pt.start(); if (!lerc.Decode(pLercBlob, numBytesBlob, &bitMask3, w, h, 1, Lerc::DT_Float, (void*)zImg3)) cout << "decode failed" << endl; pt.stop(); // compare to orig double maxDelta = 0; for (int k = 0, i = 0; i < h; i++) { for (int j = 0; j < w; j++, k++) { if (bitMask3.IsValid(k) != bitMask.IsValid(k)) cout << "Error in main: decoded bit mask differs from encoded bit mask" << endl; if (bitMask3.IsValid(k)) { double delta = fabs(zImg3[k] - zImg[k]); if (delta > maxDelta) maxDelta = delta; } } } cout << "max z error per pixel = " << maxDelta << ", decode time = " << pt.ms() << " ms" << endl; delete[] zImg; delete[] zImg3; delete[] pLercBlob; pLercBlob = 0; // Sample 2: random byte image, 3 bands, all pixels valid, maxZError = 0 (lossless) h = 713; w = 257; Byte* byteImg = new Byte[w * h * 3]; memset(byteImg, 0, w * h * 3); for (int iBand = 0; iBand < 3; iBand++) { Byte* arr = byteImg + iBand * w * h; for (int k = 0, i = 0; i < h; i++) for (int j = 0; j < w; j++, k++) arr[k] = rand() % 30; } // encode if (!lerc.ComputeBufferSize((void*)byteImg, Lerc::DT_Byte, w, h, 3, 0, 0, numBytesNeeded)) cout << "ComputeBufferSize failed" << endl; numBytesBlob = numBytesNeeded; pLercBlob = new Byte[numBytesBlob]; pt.start(); if (!lerc.Encode((void*)byteImg, // raw image data, row by row, band by band Lerc::DT_Byte, w, h, 3, 0, // 0 if all pixels are valid 0, // max coding error per pixel, or precision pLercBlob, // buffer to write to, function will fail if buffer too small numBytesBlob, // buffer size numBytesWritten)) // num bytes written to buffer { cout << "Encode failed" << endl; } pt.stop(); ratio = w * h * 3 / (double)numBytesBlob; cout << "sample 2 compression ratio = " << ratio << ", encode time = " << pt.ms() << " ms" << endl; // new data storage Byte* byteImg3 = new Byte[w * h * 3]; memset(byteImg3, 0, w * h * 3); // decompress if (!lerc.GetLercInfo(pLercBlob, numBytesBlob, lercInfo)) cout << "get header info failed" << endl; if (lercInfo.nCols != w || lercInfo.nRows != h || lercInfo.nBands != 3 || lercInfo.dt != Lerc::DT_Byte) cout << "got wrong lerc info" << endl; pt.start(); if (!lerc.Decode(pLercBlob, numBytesBlob, 0, w, h, 3, Lerc::DT_Byte, (void*)byteImg3)) cout << "decode failed" << endl; pt.stop(); // compare to orig maxDelta = 0; for (int k = 0, i = 0; i < h; i++) for (int j = 0; j < w; j++, k++) { double delta = abs(byteImg3[k] - byteImg[k]); if (delta > maxDelta) maxDelta = delta; } cout << "max z error per pixel = " << maxDelta << ", decode time = " << pt.ms() << " ms" << endl; delete[] byteImg; delete[] byteImg3; delete[] pLercBlob; pLercBlob = 0; #ifdef TestLegacyData Byte* pLercBuffer = new Byte[4 * 2048 * 2048]; Byte* pDstArr = new Byte[4 * 2048 * 2048]; vector<string> fnVec; string path = "D:/GitHub/LercOpenSource/testData/"; fnVec.push_back("amazon3.lerc1"); fnVec.push_back("tuna.lerc1"); fnVec.push_back("tuna_0_to_1_w1920_h925.lerc1"); fnVec.push_back("testbytes.lerc2"); fnVec.push_back("testHuffman_w30_h20_uchar0.lerc2"); fnVec.push_back("testHuffman_w30_h20_ucharx.lerc2"); fnVec.push_back("testHuffman_w1922_h1083_uchar.lerc2"); fnVec.push_back("testall_w30_h20_char.lerc2"); fnVec.push_back("testall_w30_h20_byte.lerc2"); fnVec.push_back("testall_w30_h20_short.lerc2"); fnVec.push_back("testall_w30_h20_ushort.lerc2"); fnVec.push_back("testall_w30_h20_long.lerc2"); fnVec.push_back("testall_w30_h20_ulong.lerc2"); fnVec.push_back("testall_w30_h20_float.lerc2"); fnVec.push_back("testall_w1922_h1083_char.lerc2"); fnVec.push_back("testall_w1922_h1083_byte.lerc2"); fnVec.push_back("testall_w1922_h1083_short.lerc2"); fnVec.push_back("testall_w1922_h1083_ushort.lerc2"); fnVec.push_back("testall_w1922_h1083_long.lerc2"); fnVec.push_back("testall_w1922_h1083_ulong.lerc2"); fnVec.push_back("testall_w1922_h1083_float.lerc2"); fnVec.push_back("testuv_w30_h20_char.lerc2"); fnVec.push_back("testuv_w30_h20_byte.lerc2"); fnVec.push_back("testuv_w30_h20_short.lerc2"); fnVec.push_back("testuv_w30_h20_ushort.lerc2"); fnVec.push_back("testuv_w30_h20_long.lerc2"); fnVec.push_back("testuv_w30_h20_ulong.lerc2"); fnVec.push_back("testuv_w30_h20_float.lerc2"); fnVec.push_back("testuv_w1922_h1083_char.lerc2"); fnVec.push_back("testuv_w1922_h1083_byte.lerc2"); fnVec.push_back("testuv_w1922_h1083_short.lerc2"); fnVec.push_back("testuv_w1922_h1083_ushort.lerc2"); fnVec.push_back("testuv_w1922_h1083_long.lerc2"); fnVec.push_back("testuv_w1922_h1083_ulong.lerc2"); fnVec.push_back("testuv_w1922_h1083_float.lerc2"); for (size_t n = 0; n < fnVec.size(); n++) { string fn = path; fn += fnVec[n]; FILE* fp = 0; fopen_s(&fp, fn.c_str(), "rb"); fseek(fp, 0, SEEK_END); size_t fileSize = ftell(fp); // get the file size fclose(fp); fp = 0; fopen_s(&fp, fn.c_str(), "rb"); fread(pLercBuffer, 1, fileSize, fp); // read Lerc blob into buffer fclose(fp); fp = 0; if (!lerc.GetLercInfo(pLercBuffer, fileSize, lercInfo)) cout << "get header info failed" << endl; else { int w = lercInfo.nCols; int h = lercInfo.nRows; int nBands = lercInfo.nBands; Lerc::DataType dt = lercInfo.dt; pt.start(); std::string resultMsg = "ok"; BitMask bitMask; if (!lerc.Decode(pLercBuffer, fileSize, &bitMask, w, h, nBands, dt, (void*)pDstArr)) resultMsg = "FAILED"; pt.stop(); printf("w = %4d, h = %4d, nBands = %2d, dt = %2d, time = %4d ms, %s : %s\n", w, h, nBands, (int)dt, pt.ms(), resultMsg.c_str(), fnVec[n].c_str()); } } #endif printf("\npress ENTER\n"); getchar(); return 0; }
int main(int argc, char* argv[]) { #ifdef BENCHMARKING benchmark(argc, argv); #else // mpi setup int numProcs; int rank, flag; int done = 0; MPI_Status status; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &numProcs); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // create a buffer for both worker and controller static double buffer[BUFFER_SIZE]; unsigned int niter = argc > 1 ? atoi(argv[1]) : NITER; // Setting up the PSF (statically) int psfWidth, psfHeight; double* psf = ImageQueue::getPsf(&psfWidth, &psfHeight); // ---------- CONTROLLER NODE ---------- // if (rank == 0) { // Set up producer ImageQueue images(buffer, BUFFER_SIZE, "../images", numProcs); // Print out some details int numImages = images.remaining(); FPRINT("Starting %d iteration(s) on %d image(s)", niter, numImages); PerfTimer mainTimer; mainTimer.begin(); int toSend = (unsigned int)numProcs < images.remaining() ? numProcs : images.remaining(); for (int i = 0; i < toSend; i++) { images.pop(i); MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD); } while (images.remaining() > 0) { for (int i = 0; i < numProcs; i++) { // If an image is received then save it and send the next one MPI_Iprobe(i, IMG, MPI_COMM_WORLD, &flag, &status); if (flag) { MPI_Recv(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD, &status); images.save(i); images.pop(i); MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD); } } } for (int i = 0; i < numProcs; i++) { MPI_Send(&done, 1, MPI_INT, i, END, MPI_COMM_WORLD); } FPRINT("Finished %d image(s) in %f seconds", numImages, mainTimer.getElapsed()); } // ---------- WORKER NODE ---------- // else { // worker thread // Set up consumer DeconvFilter filter(WIDTH, HEIGHT, niter, psf, psfWidth, psfHeight, buffer); bool running = true; PRINT("Worker thread initialised."); while (running) { MPI_Iprobe(0, IMG, MPI_COMM_WORLD, &flag, &status); if (flag) { // New image MPI_Recv(buffer, BUFFER_SIZE, MPI_DOUBLE, 0, IMG, MPI_COMM_WORLD, &status); filter.process(); MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, 0, IMG, MPI_COMM_WORLD); } MPI_Iprobe(0, END, MPI_COMM_WORLD, &flag, &status); if (flag) { // Execution finished MPI_Recv(&done, 1, MPI_INT, 0, END, MPI_COMM_WORLD, &status); running = false; } } PRINT("Worker thread finished."); } MPI_Finalize(); #endif return 0; }