예제 #1
0
void TestVectorOfObjects::run(size_t count, size_t updates)
{
	PerfTimer perf;

	perf.start();
	std::vector<Particle> particles(count);
	perf.stop(&_creationTime);

	// randomize: no sense in this case...
	/*for (size_t i = 0; i < count / 2; ++i)
	{
		int a = rand() % count;
		int b = rand() % count;
		std::swap(particles[a], particles[b]);
	}*/

	_memoryKb = (particles.capacity()*sizeof(Particle)) / 1024.0;

	for (auto p = particles.begin(); p != particles.end(); ++p)
		p->generate();

	perf.start();
	for (size_t u = 0; u < updates; ++u)
	{
		for (auto p = particles.begin(); p != particles.end(); ++p)
			p->update(DELTA_TIME);
	}
	perf.stop(&_updatesTime);
}
예제 #2
0
void TestVectorOfPointers::run(size_t count, size_t updates)
{
	PerfTimer perf;

	perf.start();
	std::vector<std::shared_ptr<Particle>> particles(count);
	for (auto p = particles.begin(); p != particles.end(); ++p)
	{
		*p = std::make_shared<Particle>();
	}

	perf.stop(&_creationTime);

	// randomize to simulate 
	for (size_t i = 0; i < count / 2; ++i)
	{
		int a = rand() % count;
		int b = rand() % count;
		if (a != b)
			std::swap(particles[a], particles[b]);
	}

	/*for (int i = 0; i < 10; ++i)
	{
		std::cout << (unsigned long)particles[i].get() << std::endl;
	}*/

	_memoryKb = (particles.capacity()*sizeof(Particle)) / 1024.0;

	for (auto p = particles.begin(); p != particles.end(); ++p)
		(*p)->generate();

	perf.start();
	for (size_t u = 0; u < updates; ++u)
	{
		for (auto p = particles.begin(); p != particles.end(); ++p)
			(*p)->update(DELTA_TIME);
	}
	perf.stop(&_updatesTime);
}
예제 #3
0
// loading/unloading
// ----------------------------------------------------------------------------------------------
// Loads a file and turns it into a runtime resource
bool ResourceManager::LoadResource(Resource * res,  const WCHAR* filename)
{
    bool ok = false;
    // try to create resource
    if (!FileUtils::Exists(filename))
    {
        Logger::Log(OutputMessageType::Error, L"Failed to load file, '%ls' -- does not exist\n", filename);
        return false;
    }

    PerfTimer timer;
    timer.Start();

    // get factory associated with 'filename'
    ResourceFactory * factory = GetFactory(filename);
    if (!factory)
    {
        return false;
    }


    ok = factory->LoadResource(res, filename);
    if (ok)
    {
        res->SetReady();
        timer.Stop();
        Logger::Log(OutputMessageType::Debug, L"%d ms Loaded %ls\n", timer.ElapsedMilliseconds(), FileUtils::Name(filename));
    }
    else
    {
        timer.Stop();
        Logger::Log(OutputMessageType::Error, L"%d ms failed to load %ls\n", timer.ElapsedMilliseconds(), filename);
    }

    return ok;
}
CLerror CLElectrosFunctor<T>::LoadKernels ( size_t deviceID )
{
    PerfTimer timer;
    timer.start();
    FunctorData &data = m_functors[deviceID];

    cout<<" Reading kernel source"<<endl;
    using std::ifstream;
    ifstream reader("Electrostatics.cl.c", ifstream::in);
    if (!reader.good())
    {
        cout<<"Cannot open program source"<<endl;
        return -1;
    }
    reader.seekg (0, std::ios::end);
    size_t length = reader.tellg();
    reader.seekg (0, std::ios::beg);
    char *source = new char[length];
    reader.read(source, length);
    reader.close();

    /*
     * Different devices require different work group sizes to operate
     * optimally. The amount of __local memory on some kernels depends on these
     * work-group sizes. This causes a problem as explained below:
     * There are two ways to use group-local memory
     * 1) Allocate it as a parameter with clSetKernelArg()
     * 2) Declare it as a constant __local array within the cl kernel
     * Option (1) has the advantage of flexibility, but the extra indexing
     * overhead is a performance killer (20-25% easily lost on nvidia GPUs)
     * Option (2) has the advantage that the compiler knows the arrays are of
     * constant size, and is free to do extreme optimizations.
     * Of course, then both host and kernel have to agree on the size of the
     * work group.
     * We abuse the fact that the source code is compiled at runtime, decide
     * those sizes in the host code, then #define them in the kernel code,
     * before it is compiled.
     */

    // BLOCK size
    data.local = {BLOCK_X, 1, 1};
    size_t local_MT[3] = {BLOCK_X_MT, BLOCK_Y_MT, 1};
    // GRID size
    data.global = {((this->m_nLines + BLOCK_X - 1)/BLOCK_X)
                   * BLOCK_X, 1, 1
                  };
    data.global[0] /= data.vecWidth;
    data.local[0] /= data.vecWidth;
    cout<<"Local   : "<<data.local[0]<<" "<<data.local[1]<<" "
        <<data.local[2]<<endl;
    cout<<"Local_MT: "<<local_MT[0]<<" "<<local_MT[1]<<" "<<local_MT[2]<<endl;
    cout<<"Global  : "<<data.global[0]<<" "<<data.global[1]<<" "
        <<data.global[2]<<endl;

    char defines[1024];
    const size_t kernelSteps = this->m_pFieldLinesData->GetSize()
                               / this->m_nLines;
    snprintf(defines, sizeof(defines),
             "#define BLOCK_X %u\n"
             "#define BLOCK_X_MT %u\n"
             "#define BLOCK_Y_MT %u\n"
             "#define KERNEL_STEPS %u\n"
             "#define Tprec %s\n"
             "#define Tvec %s\n",
             (unsigned int) data.local[0],
             (unsigned int) local_MT[0], (unsigned int)local_MT[1],
             (unsigned int) kernelSteps,
             FindPrecType(),
             FindVecType(data.vecWidth)
            );

    cout<<" Calc'ed kern steps "<<kernelSteps<<endl;
    char *srcs[2] = {defines, source};
    CLerror err;
    cl_program prog = clCreateProgramWithSource(data.context, 2,
                      (const char**) srcs,
                      NULL, &err);
    if (err)cout<<"clCreateProgramWithSource returns: "<<err<<endl;

    char options[] = "-cl-fast-relaxed-math";
    err = clBuildProgram(prog, 0, NULL, options, NULL, NULL);
    if (err)cout<<"clBuildProgram returns: "<<err<<endl;

    size_t logSize;
    clGetProgramBuildInfo(prog, data.device->deviceID,
                          CL_PROGRAM_BUILD_LOG,
                          0, NULL, &logSize);
    char * log = (char*)malloc(logSize);
    clGetProgramBuildInfo(prog, data.device->deviceID,
                          CL_PROGRAM_BUILD_LOG,
                          logSize, log, 0);
    cout<<"Program Build Log:"<<endl<<log<<endl;
    CL_ASSERTE(err, "clBuildProgram failed");
    data.perfData.add(TimingInfo("Program compilation", timer.tick()));



    //==========================================================================
    cout<<" Preparing kernel"<<endl;
    data.kernel = clCreateKernel(prog, "CalcField_curvature", &err);
    CL_ASSERTE(err, "clCreateKernel");
    return CL_SUCCESS;
}
unsigned long CLElectrosFunctor<T>::MainFunctor (
    size_t functorIndex,    ///< Functor whose data to process
    size_t deviceIndex      ///< Device on which to process data
)
{
    if(functorIndex != deviceIndex)
        cerr<<"WARNING: Different functor and device"<<endl;
    PerfTimer timer;
    FunctorData &funData = m_functors[functorIndex];
    FunctorData &devData = m_functors[deviceIndex];
    perfPacket &profiler = devData.perfData;
    timer.start();
    CLerror err;
    cl_context ctx = devData.context;

    cout<<" Preparing buffers"<<endl;
    Vector3<cl_mem> &arrdata = devData.devFieldMem;
    cl_mem &charges = devData.chargeMem;
    cl_kernel &kernel = devData.kernel;

    err = CL_SUCCESS;
    // __global float *x,
    err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &arrdata.x);
    // __global float *y,
    err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &arrdata.y);
    // __global float *z,
    err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &arrdata.z);
    // __global pointCharge *Charges,
    err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &charges);
    // const unsigned int linePitch,
    cl_uint param = this->m_nLines;
    err |= clSetKernelArg(kernel, 4, sizeof(param), &param);
    // const unsigned int p,
    param = (cl_uint)this->m_pPointChargeData->GetSize();
    err |= clSetKernelArg(kernel, 5, sizeof(param), &param);
    // const unsigned int fieldIndex,
    param = 1;
    err |= clSetKernelArg(kernel, 6, sizeof(param), &param);

    // const float resolution
    T res = this->m_resolution;
    err |= clSetKernelArg(kernel, 7, sizeof(res), &res);
    if (err)cout<<"clSetKernelArg cummulates: "<<err<<endl;

    //==========================================================================
    cl_command_queue queue = clCreateCommandQueue(ctx,
                             devData.device->deviceID,
                             0, &err);
    if (err)cout<<"clCreateCommandQueue returns: "<<err<<endl;

    timer.tick();
    Vector3<T*> hostArr = this->m_pFieldLinesData->GetDataPointers();
    const size_t start = funData.startIndex;
    const size_t size = funData.elements * sizeof(T) * funData.steps;

    err = CL_SUCCESS;
    err |= clEnqueueWriteBuffer(queue, arrdata.x, CL_FALSE, 0, size,
                                &hostArr.x[start], 0, NULL, NULL);
    if (err)cout<<"Write 1 returns: "<<err<<endl;
    err |= clEnqueueWriteBuffer(queue, arrdata.y, CL_FALSE, 0, size,
                                &hostArr.y[start], 0, NULL, NULL);
    if (err)cout<<"Write 2 returns: "<<err<<endl;
    err |= clEnqueueWriteBuffer(queue, arrdata.z, CL_FALSE, 0, size,
                                &hostArr.z[start], 0, NULL, NULL);
    if (err)cout<<"Write 3 returns: "<<err<<endl;
    const size_t qSize = this->m_pPointChargeData->GetSizeBytes();
    err |= clEnqueueWriteBuffer(queue, charges, CL_FALSE, 0, qSize,
                                this->m_pPointChargeData->GetDataPointer(),
                                0, NULL, NULL);
    if (err)cout<<"Write 4 returns: "<<err<<endl;
    CL_ASSERTE(err, "Sending data to device failed");

    // Finish memory copies before starting the kernel
    CL_ASSERTE(clFinish(queue), "Pre-kernel sync");

    profiler.add(TimingInfo("Host to device transfer", timer.tick(),
                            3*size + qSize ));

    //==========================================================================

    cout<<" Executing kernel"<<endl;

    timer.tick();
    err |= clEnqueueNDRangeKernel(queue, kernel, 3, NULL,
                                  funData.global, funData.local,
                                  0, NULL, NULL);
    if (err)cout<<"clEnqueueNDRangeKernel returns: "<<err<<endl;
    // Let kernel finish before continuing
    CL_ASSERTE(clFinish(queue), "Post-kernel sync");
    double time = timer.tick();
    this->m_pPerfData->time = time;
    this->m_pPerfData->performance =
        ( this->m_nLines * ( ( 2500-1 ) * ( this->m_pPointChargeData->GetSize()
                                            * ( electroPartFieldFLOP + 3 ) + 13 ) ) / time ) / 1E9;
    profiler.add(TimingInfo("Kernel execution time", time));
    //==========================================================================
    cout<<" Recovering results"<<endl;

    timer.tick();
    err = CL_SUCCESS;
    err |= clEnqueueReadBuffer ( queue, arrdata.x, CL_FALSE, 0, size,
                                 hostArr.x, 0, NULL, NULL );
    if (err)cout<<" Read 1 returns: "<<err<<endl;
    err |= clEnqueueReadBuffer ( queue, arrdata.y, CL_FALSE, 0, size,
                                 hostArr.y, 0, NULL, NULL );
    if (err)cout<<" Read 2 returns: "<<err<<endl;
    err |= clEnqueueReadBuffer ( queue, arrdata.z, CL_FALSE, 0, size,
                                 hostArr.z, 0, NULL, NULL );
    if (err)cout<<" Read 3 returns: "<<err<<endl;
    if (err)cout<<"clEnqueueReadBuffer cummulates: "<<err<<endl;

    clFinish(queue);

    profiler.add(TimingInfo("Device to host transfer", timer.tick(),
                            3 * size));
    return CL_SUCCESS;
}
예제 #6
0
bool RagGraphPlanner::findLocalTrajectory(const Controller::State &cbegin, GenWorkspaceChainState::Seq::const_iterator wbegin, GenWorkspaceChainState::Seq::const_iterator wend, Controller::Trajectory &trajectory, Controller::Trajectory::iterator iter, MSecTmU32 timeOut) {
	CriticalSectionWrapper csw(csCommand);

#ifdef _HBGRAPHPLANNER_PERFMON
	PerfTimer t;
#ifdef _HBHEURISTIC_PERFMON
	HBHeuristic::resetLog();
	HBCollision::resetLog();
#endif
#endif
	HBHeuristic *heuristic = getHBHeuristic();
	if (heuristic/* && heuristic->enableUnc*/)
		enableHandPlanning();

//	context.write("findLocalTrajectory(): %s\n", hbplannerDebug(*this).c_str());
	//context.write("RagGraphPlanner::findLocalTrajectory: %s\n", hbplannerConfigspaceDebug(*this).c_str());
	//context.write("RagGraphPlanner::findLocalTrajectory: %s\n", hbplannerWorkspaceDebug(*this).c_str());

	// trajectory size
	const size_t size = 1 + (size_t)(wend - wbegin);
	// check initial size
	if (size < 2) {
		context.error("GraphPlanner::findLocalTrajectory(): Invalid workspace sequence size\n");
		return false;
	}
	// time out
	const MSecTmU32 segTimeOut = timeOut == MSEC_TM_U32_INF ? MSEC_TM_U32_INF : timeOut / MSecTmU32(size - 1);
	// fill trajectory with cbegin
	const Controller::State cinit = cbegin; // backup
	Controller::Trajectory::iterator end = ++trajectory.insert(iter, cinit);
	for (GenWorkspaceChainState::Seq::const_iterator i = wbegin; i != wend; ++i)
		end = ++trajectory.insert(end, cinit);
	Controller::Trajectory::iterator begin = end - size;

	getCallbackDataSync()->syncCollisionBounds();
	optimisedPath.resize(size - 1);
	population.assign(1, cbegin.cpos); // always keep initial solution in case no transformation is required
	pKinematics->setPopulation(&population);
	pKinematics->setDistRootFac(pKinematics->getDesc().distRootLocalFac);

	// find configspace trajectory
	PARAMETER_GUARD(Heuristic, GenCoordConfigspace, Min, *pHeuristic);
	PARAMETER_GUARD(Heuristic, GenCoordConfigspace, Max, *pHeuristic);
	for (size_t i = 1; i < size; ++i) {
		// pointers
		const Controller::Trajectory::iterator c[2] = { begin + i - 1, begin + i };
		const GenWorkspaceChainState::Seq::const_iterator w = wbegin + i - 1;

		// setup search limits
		GenCoordConfigspace min = pHeuristic->getMin();
		GenCoordConfigspace max = pHeuristic->getMin();
		for (Configspace::Index j = stateInfo.getJoints().begin(); j < stateInfo.getJoints().end(); ++j) {
			const idx_t k = j - stateInfo.getJoints().begin();
			min[j].pos = c[0]->cpos[j] - localFinderDesc.range[k];
			max[j].pos = c[0]->cpos[j] + localFinderDesc.range[k];
		}
		pHeuristic->setMin(min);
		pHeuristic->setMax(max);

		// and search for a solution
		if (!pKinematics->findGoal(*c[0], *w, *c[1], segTimeOut)) {
			context.error("GraphPlanner::findLocalTrajectory(): unable to solve inverse kinematics\n");
			return false;
		}

		// visualisation
		optimisedPath[i - 1].cpos = c[1]->cpos;
		optimisedPath[i - 1].wpos = w->wpos;
	}

	// profile configspace trajectory
	pProfile->profile(trajectory, begin, end);

	getCallbackDataSync()->syncFindTrajectory(begin, end, &*(wend - 1));


#ifdef _HBGRAPHPLANNER_PERFMON
	context.write("GraphPlanner::findLocalTrajectory(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), size);
#ifdef _HBHEURISTIC_PERFMON
	if (heuristic) {
		context.write("Enabled Uncertainty %s\n", heuristic->enableUnc ? "ON" : "OFF");
		//heuristic->writeLog(context, "GraphPlanner::findTarget()");
		heuristic->getCollision()->writeLog(context, "GraphPlanner::findTarget()");;
	}
#endif
#endif

	if (heuristic/* && heuristic->enableUnc*/)
		disableHandPlanning();

	return true;
}
예제 #7
0
bool RagGraphPlanner::findGlobalTrajectory(const Controller::State &begin, const Controller::State &end, Controller::Trajectory &trajectory, Controller::Trajectory::iterator iter, const GenWorkspaceChainState* wend) {
	CriticalSectionWrapper csw(csCommand);

#ifdef _HBGRAPHPLANNER_PERFMON
	PerfTimer t;
#endif

#ifdef _HBGRAPHPLANNER_PERFMON
#ifdef _HBHEURISTIC_PERFMON
	HBHeuristic::resetLog();
	HBCollision::resetLog();
#endif
#ifdef _BOUNDS_PERFMON
	Bounds::resetLog();
#endif
#endif

	getCallbackDataSync()->syncCollisionBounds();

#ifdef _HBGRAPHPLANNER_PERFMON
	t.reset();
#endif
	// generate global graph only for the arm
	context.debug("GraphPlanner::findGlobalTrajectory(): Enabled Uncertainty %s. disable hand planning...\n", getHBHeuristic()->enableUnc ? "ON" : "OFF");
	disableHandPlanning();
//	context.write("findGlobalTrajectory(): %s\n", hbplannerDebug(*this).c_str());
	//context.write("GraphPlanner::findGlobalTrajectory(): %s\n", hbplannerConfigspaceDebug(*this).c_str());
	//context.write("GraphPlanner::findGlobalTrajectory(): %s\n", hbplannerWorkspaceDebug(*this).c_str());

	// generate global graph
	pGlobalPathFinder->generateOnlineGraph(begin.cpos, end.cpos);
	// find node path on global graph
	globalPath.clear();
	if (!pGlobalPathFinder->findPath(end.cpos, globalPath, globalPath.begin())) {
		context.error("GlobalPathFinder::findPath(): unable to find global path\n");
		return false;
	}
#ifdef _HBGRAPHPLANNER_PERFMON
	context.write(
		"GlobalPathFinder::findPath(): time_elapsed = %f [sec], len = %d\n",
		t.elapsed(), globalPath.size()
		);
#endif

	if (pLocalPathFinder != NULL) {
#ifdef _HBGRAPHPLANNER_PERFMON
		t.reset();
#endif
		PARAMETER_GUARD(Heuristic, Real, Scale, *pHeuristic);

		for (U32 i = 0;;) {
			localPath = globalPath;
			if (localFind(begin.cpos, end.cpos, localPath))
				break;
			else if (++i > pathFinderDesc.numOfTrials) {
				context.error("LocalPathFinder::findPath(): unable to find local path\n");
				return false;
			}
		}
#ifdef _HBGRAPHPLANNER_PERFMON
		context.write(
			"LocalPathFinder::findPath(): time_elapsed = %f [sec], len = %d\n",
			t.elapsed(), localPath.size()
			);
#endif
		// copy localPath
		optimisedPath = localPath;
	}
	else {
		// copy globalPath
		optimisedPath = globalPath;
	}

#ifdef _HBGRAPHPLANNER_PERFMON
#ifdef _HBHEURISTIC_PERFMON
//	context.debug("Enabled Uncertainty %s\n", getHBHeuristic()->enableUnc ? "ON" : "OFF");
	//HBHeuristic::writeLog(context, "PathFinder::find()");
	HBCollision::writeLog(context, "PathFinder::find()");
#endif
#ifdef _BOUNDS_PERFMON
	Bounds::writeLog(context, "PathFinder::find()");
#endif
#endif

#ifdef _HBGRAPHPLANNER_PERFMON
#ifdef _HBHEURISTIC_PERFMON
	HBHeuristic::resetLog();
	HBCollision::resetLog();
#endif
#ifdef _BOUNDS_PERFMON
	Bounds::resetLog();
#endif
	t.reset();
#endif
	optimize(optimisedPath, begin.cacc, end.cacc);
#ifdef _HBGRAPHPLANNER_PERFMON
	context.write(
		"GraphPlanner::optimize(): time_elapsed = %f [sec], len = %d\n", t.elapsed(), optimisedPath.size()
		);
#ifdef _HBHEURISTIC_PERFMON
	HBHeuristic::writeLog(context, "GraphPlanner::optimize()");
	HBCollision::writeLog(context, "GraphPlanner::optimize()");
#endif
#ifdef _BOUNDS_PERFMON
	Bounds::writeLog(context, "GraphPlanner::optimize()");
#endif
#endif

#ifdef _HBGRAPHPLANNER_PERFMON
	t.reset();
#endif
	Controller::Trajectory::iterator iend = iter;
	pProfile->create(optimisedPath.begin(), optimisedPath.end(), begin, end, trajectory, iter, iend);
	pProfile->profile(trajectory, iter, iend);
#ifdef _HBGRAPHPLANNER_PERFMON
	context.write(
		"GraphPlanner::profile(): time_elapsed = %f [sec], len = %d\n",
		t.elapsed(), trajectory.size()
		);
#endif

	getCallbackDataSync()->syncFindTrajectory(trajectory.begin(), trajectory.end(), wend);

	return true;
}
예제 #8
0
bool RagGraphPlanner::findTarget(const GenConfigspaceState &begin, const GenWorkspaceChainState& wend, GenConfigspaceState &cend) {
	CriticalSectionWrapper csw(csCommand);

	HBHeuristic *heuristic = getHBHeuristic();
	if (heuristic) {
		enableUnc = heuristic->enableUnc;
		heuristic->enableUnc = false;
		context.debug("RagGraphPlanner::findTarget(): enable unc %s\n", heuristic->enableUnc ? "ON" : "OFF");
	}
	// TODO: Find why the pre-grasp pose returns with close fingers
	disableHandPlanning();
//	context.debug("findTarget: %s\n", hbplannerDebug(*this).c_str());
	//context.write("RagGraphPlanner::findTarget: %s\n", hbplannerConfigspaceDebug(*this).c_str());
	//context.write("RagGraphPlanner::findTarget: %s\n", hbplannerWorkspaceDebug(*this).c_str());

#ifdef _HBHEURISTIC_PERFMON
	heuristic->resetLog();
	heuristic->getCollision()->resetLog();
#endif
#ifdef _HBGRAPHPLANNER_PERFMON
	PerfTimer t;
#endif

	getCallbackDataSync()->syncCollisionBounds();

	// generate graph
	pGlobalPathFinder->generateOnlineGraph(begin.cpos, wend.wpos);

	// create waypoints pointers
	const Waypoint::Seq& graph = pGlobalPathFinder->getGraph();
	WaypointPtr::Seq waypointPtrGraph;
	waypointPtrGraph.reserve(graph.size());
	for (U32 i = 0; i < graph.size(); ++i)
		waypointPtrGraph.push_back(WaypointPtr(&graph[i]));

	// Create waypoint population
	const U32 populationSize = std::min(U32(pKinematics->getDesc().populationSize), (U32)waypointPtrGraph.size());

	// sort waypoints (pointers) from the lowest to the highest cost
	std::partial_sort(waypointPtrGraph.begin(), waypointPtrGraph.begin() + populationSize, waypointPtrGraph.end(), WaypointPtr::cost_less());

	// create initial population for kinematics solver
	ConfigspaceCoord::Seq population;
	population.reserve(populationSize);
	for (WaypointPtr::Seq::const_iterator i = waypointPtrGraph.begin(); population.size() < populationSize && i != waypointPtrGraph.end(); ++i)
		population.push_back((*i)->cpos);
	pKinematics->setPopulation(&population);

	// set global root distance factor
	pKinematics->setDistRootFac(pKinematics->getDesc().distRootGlobalFac);

	GenConfigspaceState root;
	root.setToDefault(controller.getStateInfo().getJoints().begin(), controller.getStateInfo().getJoints().end());
	root.cpos = graph[Node::IDX_ROOT].cpos;

	// find the goal state
	if (!pKinematics->findGoal(root, wend, cend)) {
		context.error("GraphPlanner::findTarget(): unable to find target\n");
		return false;
	}

	cend.t = wend.t;
	cend.cvel.fill(REAL_ZERO);
	cend.cacc.fill(REAL_ZERO);

#ifdef _HBGRAPHPLANNER_PERFMON
	context.write(
		"GraphPlanner::findTarget(): time_elapsed = %f [sec]\n", t.elapsed()
		);
#endif
#ifdef _HBHEURISTIC_PERFMON
	heuristic->writeLog(context, "GraphPlanner::findTarget()");
	heuristic->getCollision()->writeLog(context, "GraphPlanner::findTarget()");;
#endif

	if (heuristic)
		heuristic->enableUnc = enableUnc;

	//enableHandPlanning();

//	context.write("RagGraphPlanner::findTarget(): done.\n");
	return true;
}
예제 #9
0
int main(int argc, char *argv[]) {
    if (argc == 2) {
        push_to_graphite = std::string(argv[1]) == "graphite";
    }
    const char* pattern = ":1234567\r\n+3.14159\r\n";
    std::string input;
    for (int i = 0; i < TEST_ITERATIONS/2; i++) {
        input += pattern;
    }
    std::vector<double> timedeltas;
    uint64_t intvalue;
    Byte buffer[RESPStream::STRING_LENGTH_MAX];
    for (int i = N_TESTS; i --> 0;) {
        PerfTimer tm;
        MemStreamReader stream(input.data(), input.size());
        RESPStream protocol(&stream);
        for (int j = TEST_ITERATIONS; j --> 0;) {
            auto type = protocol.next_type();
            switch(type) {
            case RESPStream::INTEGER:
                intvalue = protocol.read_int();
                if (intvalue != 1234567) {
                    std::cerr << "Bad int value at " << j << std::endl;
                    return -1;
                }
                break;
            case RESPStream::STRING: {
                    int len = protocol.read_string(buffer, sizeof(buffer));
                    if (len != 7) {
                        std::cerr << "Bad string value at " << j << std::endl;
                        return -1;
                    }
                    char *p = buffer;
                    double res = strtod(buffer, &p);
                    if (abs(res - 3.14159) > 0.0001) {
                        std::cerr << "Can't parse float at " << j << std::endl;
                        return -1;
                    }
                }
                break;
            case RESPStream::ARRAY:
            case RESPStream::BAD:
            case RESPStream::BULK_STR:
            case RESPStream::ERROR:
            default:
                std::cerr << "Error at " << j << std::endl;
                return -1;
            };
        }
        timedeltas.push_back(tm.elapsed());
    }
    double min = std::numeric_limits<double>::max();
    for (auto t: timedeltas) {
        min = std::min(min, t);
    }
    std::cout << "Parsing " << TEST_ITERATIONS << " messages in " << min << " sec." << std::endl;
    if (push_to_graphite) {
        push_metric_to_graphite("respstream", 1000.0*min);
    }
    return 0;
}
예제 #10
0
int main(int argc, char** argv) {
    const uint64_t N_TIMESTAMPS = 1000;
    const uint64_t N_PARAMS = 100;
    UncompressedChunk header;
    std::cout << "Testing timestamp sequence" << std::endl;
    int c = 100;
    std::vector<aku_ParamId> ids;
    for (uint64_t id = 0; id < N_PARAMS; id++) { ids.push_back(id); }
    RandomWalk rwalk(10.0, 0.0, 0.01, N_PARAMS);
    for (uint64_t id = 0; id < N_PARAMS; id++) {
        for (uint64_t ts = 0; ts < N_TIMESTAMPS; ts++) {
            header.paramids.push_back(ids[id]);
            int k = rand() % 2;
            if (k) {
                c++;
            } else if (c > 0) {
                c--;
            }
            header.timestamps.push_back((ts + c) << 8);
            header.values.push_back(rwalk.generate(0));
        }
    }

    ByteVector out;
    out.resize(N_PARAMS*N_TIMESTAMPS*24);

    const size_t UNCOMPRESSED_SIZE = header.paramids.size()*8    // Didn't count lengths and offsets
                                   + header.timestamps.size()*8  // because because this arrays contains
                                   + header.values.size()*8;     // no information and should be compressed
                                                                 // to a few bytes

    struct Writer : ChunkWriter {
        ByteVector *out;
        Writer(ByteVector *out) : out(out) {}

        virtual aku_MemRange allocate() {
            aku_MemRange range = {
                out->data(),
                static_cast<uint32_t>(out->size())
            };
            return range;
        }

        //! Commit changes
        virtual aku_Status commit(size_t bytes_written) {
            out->resize(bytes_written);
            return AKU_SUCCESS;
        }
    };
    Writer writer(&out);

    aku_Timestamp tsbegin, tsend;
    uint32_t n;
    auto status = CompressionUtil::encode_chunk(&n, &tsbegin, &tsend, &writer, header);
    if (status != AKU_SUCCESS) {
        std::cout << "Encoding error" << std::endl;
        return 1;
    }

    // Compress using zlib

    // Ids copy (zlib need all input data to be aligned because it uses SSE2 internally)
    Bytef* pgz_ids = (Bytef*)aligned_alloc(64, header.paramids.size()*8);
    memcpy(pgz_ids, header.paramids.data(), header.paramids.size()*8);
    // Timestamps copy
    Bytef* pgz_ts = (Bytef*)aligned_alloc(64, header.timestamps.size()*8);
    memcpy(pgz_ts, header.timestamps.data(), header.timestamps.size()*8);
    // Values copy
    Bytef* pgz_val = (Bytef*)aligned_alloc(64, header.values.size()*8);
    memcpy(pgz_val, header.values.data(), header.values.size()*8);

    const auto gz_max_size = N_PARAMS*N_TIMESTAMPS*24;
    Bytef* pgzout = (Bytef*)aligned_alloc(64, gz_max_size);
    uLongf gzoutlen = gz_max_size;
    size_t total_gz_size = 0, id_gz_size = 0, ts_gz_size = 0, float_gz_size = 0;
    // compress param ids
    auto zstatus = compress(pgzout, &gzoutlen, pgz_ids, header.paramids.size()*8);
    if (zstatus != Z_OK) {
        std::cout << "GZip error" << std::endl;
        exit(zstatus);
    }
    total_gz_size += gzoutlen;
    id_gz_size = gzoutlen;
    gzoutlen = gz_max_size;
    // compress timestamps
    zstatus = compress(pgzout, &gzoutlen, pgz_ts, header.timestamps.size()*8);
    if (zstatus != Z_OK) {
        std::cout << "GZip error" << std::endl;
        exit(zstatus);
    }
    total_gz_size += gzoutlen;
    ts_gz_size = gzoutlen;
    gzoutlen = gz_max_size;
    // compress floats
    zstatus = compress(pgzout, &gzoutlen, pgz_val, header.values.size()*8);
    if (zstatus != Z_OK) {
        std::cout << "GZip error" << std::endl;
        exit(zstatus);
    }
    total_gz_size += gzoutlen;
    float_gz_size = gzoutlen;

    const float GZ_BPE = float(total_gz_size)/header.paramids.size();
    const float GZ_RATIO = float(UNCOMPRESSED_SIZE)/float(total_gz_size);


    const size_t COMPRESSED_SIZE = out.size();
    const float BYTES_PER_EL = float(COMPRESSED_SIZE)/header.paramids.size();
    const float COMPRESSION_RATIO = float(UNCOMPRESSED_SIZE)/COMPRESSED_SIZE;

    std::cout << "Uncompressed: " << UNCOMPRESSED_SIZE       << std::endl
              << "  compressed: " << COMPRESSED_SIZE         << std::endl
              << "    elements: " << header.paramids.size()  << std::endl
              << "  bytes/elem: " << BYTES_PER_EL            << std::endl
              << "       ratio: " << COMPRESSION_RATIO       << std::endl
    ;

    std::cout << "Gzip stats: " << std::endl
              << "bytes/elem: " << GZ_BPE << std::endl
              << "     ratio: " << GZ_RATIO << std::endl
              << "  id bytes: " << id_gz_size << std::endl
              << "  ts bytes: " << ts_gz_size << std::endl
              << " val bytes: " << float_gz_size << std::endl;


    // Try to decompress
    UncompressedChunk decomp;
    const unsigned char* pbegin = out.data();
    const unsigned char* pend = pbegin + out.size();
    CompressionUtil::decode_chunk(&decomp, pbegin, pend, header.timestamps.size());
    bool first_error = true;
    for (auto i = 0u; i < header.timestamps.size(); i++) {
        if (header.timestamps.at(i) != decomp.timestamps.at(i) && first_error) {
            std::cout << "Error, bad timestamp at " << i << std::endl;
            first_error = false;
        }
        if (header.paramids.at(i) != decomp.paramids.at(i) && first_error) {
            std::cout << "Error, bad paramid at " << i << std::endl;
            first_error = false;
        }
        double origvalue = header.values.at(i);
        double decvalue = decomp.values.at(i);
        if (origvalue != decvalue && first_error) {
            std::cout << "Error, bad value at " << i << std::endl;
            std::cout << "Expected: " << origvalue << std::endl;
            std::cout << "Actual:   " << decvalue << std::endl;
            first_error = false;
        }
    }

    if (argc == 2 && std::string(argv[1]) == "benchmark") {
        // Bench compression process
        const int NRUNS = 1000;
        PerfTimer tm;
        aku_Status tstatus;
        volatile uint32_t vn;
        ByteVector vec;
        for (int i = 0; i < NRUNS; i++) {
            vec.resize(N_PARAMS*N_TIMESTAMPS*24);
            Writer w(&vec);
            aku_Timestamp ts;
            uint32_t n;
            tstatus = CompressionUtil::encode_chunk(&n, &ts, &ts, &w, header);
            if (tstatus != AKU_SUCCESS) {
                std::cout << "Encoding error" << std::endl;
                return 1;
            }
            vn = n;
        }
        double elapsed = tm.elapsed();
        std::cout << "Elapsed (akumuli): " << elapsed << " " << vn << std::endl;

        tm.restart();
        for (int i = 0; i < NRUNS; i++) {
            uLongf offset = 0;
            // compress param ids
            auto zstatus = compress(pgzout, &gzoutlen, pgz_ids, header.paramids.size()*8);
            if (zstatus != Z_OK) {
                std::cout << "GZip error" << std::endl;
                exit(zstatus);
            }
            offset += gzoutlen;
            gzoutlen = gz_max_size - offset;
            // compress timestamps
            zstatus = compress(pgzout + offset, &gzoutlen, pgz_ts, header.timestamps.size()*8);
            if (zstatus != Z_OK) {
                std::cout << "GZip error" << std::endl;
                exit(zstatus);
            }
            offset += gzoutlen;
            gzoutlen = gz_max_size - offset;
            // compress floats
            zstatus = compress(pgzout + offset, &gzoutlen, pgz_val, header.values.size()*8);
            if (zstatus != Z_OK) {
                std::cout << "GZip error" << std::endl;
                exit(zstatus);
            }
        }
        elapsed = tm.elapsed();
        std::cout << "Elapsed (zlib): " << elapsed << " " << vn << std::endl;
    }
}
예제 #11
0
파일: main.cpp 프로젝트: oakfr/omni3d
// this callback is called every second
void callback(void*) 
{	

	PerfTimer timer;
	
	initGraphics();

	_gl_main->redraw();

	//Fl::repeat_timeout(.5, callback);

	//return;

	if (!_gl_main->pause_video->value()) {
		Fl::repeat_timeout(_tempo, callback);
		return;
	}
	
	//refresh the control bar
	_wind->child(2)->redraw();
	
	// for performance reasons...	
	_gl_main->_camera->setActiveSensor(_gl_main->_calibrated_camera);
	
	// check database first
	if (_gl_main->_database == NULL) {
		// grab ladybug images
		if (_gl_main->_ladybug->_init) {
			_gl_main->_ladybug->grab(_gl_main->_camera->_frame._grab_img);
			_gl_main->_camera->_frame.processFrame();
			_tempo = 0.05;
		} else {
			_tempo = 1.0;
		}
	}

	// save frame if recording
	if ( _gl_main->_ladybug_recording ) {

		_tempo = .5;

		_gl_main->_ladybug->grab(_gl_main->_camera->_frame._grab_img); // grab an image on the ladybug
		_gl_main->_camera->_frame.processFrame();

		for (int sensorId = 0; sensorId < 6; sensorId++ ) {
			std::string filename = _gl_main->_database->_dirname + "//" + _gl_main->_database->getLadybugImageFilename( sensorId, _gl_main->frameId );

			cvSaveImage( filename.c_str(), _gl_main->_camera->_frame._tmp_img[sensorId] );
		}

		_gl_main->_ladybug_record_nimages++;
		_gl_main->frameId++;
		_gl_main->_database->_frameId = _gl_main->frameId;
		_gl_main->redraw();
	}

	// compute FAST features and update the tracker

	if ( _gl_main->view_flow->value() ) {
	}
		
	// refresh the main window
	if ((_gl_main->_windowMode == MODE_VIDEO) || (_gl_main->_windowMode == MODE_CALIBRATION))
		_gl_main->redraw();
	
	// refresh the main window
	if (_gl_main->_windowMode == MODE_SPHERE)
		_gl_main->redraw();
	
	_gl_control->_perf_time = timer.elapsed();
	//Fl::repeat_timeout(MAX(0.05,1.5*_gl_control->_perf_time), callback);
	Fl::repeat_timeout(_tempo, callback);
}
void	TIGERImport(
			const	ChainInfoMap&		chains,
			const	LandmarkInfoMap&	landmarks,
			const	PolygonInfoMap&		polygons,
			Pmwx&						outMap)
{
	// Our planar map MUST be empty!

	// First we go in and insert every segment from the TIGER database into our map.
	// We keep a table from coordinates into the map so we can avoid doing a search
	// when we have a segment that is already at least partially inserted.

	VertexIndex		vertices;
	EdgeIndex		edges;
	FaceIndex		faces;

	set<TLID>		badTLIDs;

	int	gSegs = 0, gBad = 0, gDupes = 0;
	set<TLID>	tlids;
#if DO_CHECKS
	map<string, TLID>	lines;
#endif

	for (ChainInfoMap::const_iterator chain = chains.begin(); chain != chains.end(); ++chain)
	{
		int i = LookupNetCFCC(chain->second.cfcc.c_str());

		vector<RawCoordPair>	pts = chain->second.shape;
		pts.insert(pts.begin(), chain->second.start);
		pts.insert(pts.end(), chain->second.end);

		if (tlids.find(chain->first) != tlids.end())
		{
//			printf("WARNING: about to dupe a TLID!\n");
			continue;
		}
		tlids.insert(chain->first);

		for (int n = 1; n < pts.size(); ++n)
		{
			++gSegs;

#if DO_CHECKS
			string	masterkey, k1 = RawCoordToKey(pts[n-1]), k2 = RawCoordToKey(pts[n]);
			if (k1 < k2)
				masterkey = k1 + k2;
			else
				masterkey = k2 + k1;

			map<string, TLID>::iterator tlidCheck = lines.find(masterkey);
			if (tlidCheck != lines.end())
			{
				printf("WARNING: already did this seg (by key), old TLID = %ul, new TLID = %ul!\n", tlidCheck->second, chain->first);
				printf("Sequence in question is: %s\n", tlidCheck->first.c_str());
				continue;
			}
			lines.insert(map<string, TLID>::value_type(masterkey, chain->first));
#endif

			try {

				Pmwx::Halfedge_handle he = InsertOneSegment(pts[n-1], pts[n], vertices, outMap);
				if (he != outMap.halfedges_end())
				{
					// InsertOneSegment always returns the dominant half-edge.  Tag it with
					// our road type, underpassing info, and our TLID.
					if (i != -1)
					{
						GISNetworkSegment_t nl;
						nl.type = kRoadCodes[i].network_type;
						he->mSegments.push_back(nl);
						he->mParams[gis_TIGER_IsUnderpassing] = kRoadCodes[i].underpassing;
					}
					he->mParams[gis_TIGER_TLID] = chain->first;
					he->twin()->mParams[gis_TIGER_TLID] = chain->first;
					edges[chain->first] = he;
				} else {
					printf("Got dupe seg, CFCC = %s, name = %s, tlid = %d\n", chain->second.cfcc.c_str(), chain->second.name.c_str(), chain->first);
					++gDupes;
				}

			} catch (...) {
				++gBad;
				printf("Got bad seg, CFCC = %s, name = %s, tlid = %d\n", chain->second.cfcc.c_str(), chain->second.name.c_str(), chain->first);
				badTLIDs.insert(chain->first);
			}

			if ((gSegs % 10000) == 0)
			{
				fprintf(stdout, ".");
				fflush(stdout);
			}
		}
	}
	std::cout << "\nTotal " << gSegs << " bad " << gBad << " dupes " << gDupes << "\n";

	double	elapsed;
	unsigned long	calls;
	double	ave;

	zeroV.GetStats(elapsed, calls);
	ave = elapsed / (double) calls;
	printf("In-face insertion: %f total, %d calls, %f average.\n", elapsed, calls, ave);

	oneV.GetStats(elapsed, calls);
	ave = elapsed / (double) calls;
	printf("One-V Insertion: %f total, %d calls, %f average.\n", elapsed, calls, ave);

	twoV.GetStats(elapsed, calls);
	ave = elapsed / (double) calls;
	printf("Two-V insertion: %f total, %d calls, %f average.\n", elapsed, calls, ave);


	// Now we go in and apply our polygon data.  We have set the dominant flag to be the halfedge
	// that goes in the same direction as the tiger database.  Since CGAL faces have CCW outer
	// boundaries, that means that the left hand poly of a TLID is adjacent to the dominant
	// halfedge.



	int	gPolys = 0, gMissingTLIDs = 0, gBadEdges = 0, gBadBackLink = 0, gDeadTLID = 0;

	for (PolygonInfoMap::const_iterator poly = polygons.begin(); poly != polygons.end(); ++poly)
	{
		if (poly->first == WORLD_POLY)
			continue;
		++gPolys;
		set<TLID>		ourTLIDs;
		for (DirectedTLIDVector::const_iterator t = poly->second.border.begin(); t != poly->second.border.end(); ++t)
			ourTLIDs.insert(t->first);
		vector<TLID>	ourBads;
		set_intersection(ourTLIDs.begin(), ourTLIDs.end(), badTLIDs.begin(), badTLIDs.end(),
						back_insert_iterator<vector<TLID> >(ourBads));
		if (!ourBads.empty())
		{
			printf("Skipped Polygon because one of its TLIDs is missing from the DB!\n");
			++gDeadTLID;
			continue;
		}

		EdgeIndex::iterator edgeIter = edges.find(*ourTLIDs.begin());
		ChainInfoMap::const_iterator tlidIter = chains.find(*ourTLIDs.begin());

		if (edgeIter != edges.end() && tlidIter != chains.end())
		{
			Pmwx::Face_handle		our_face = outMap.faces_end();
			Pmwx::Halfedge_handle	he = edgeIter->second;
			if (!he->mDominant) he = he->twin();
			if (!he->mDominant)
			{
				++gBadEdges;
				printf("WARNING: Halfedge with no dominance!!\n");
				continue;
			}

			if (poly->first == tlidIter->second.lpoly)
			{
				our_face = he->face();
			} else if (poly->first == tlidIter->second.rpoly)
			{
				our_face = he->twin()->face();
			} else  {
				printf("WARNING: TLID from poly not backlinked to our poly!\n");
				++gBadBackLink;
			}

			if (our_face != outMap.faces_end())
			{
				if (poly->second.water)
				{
					our_face->mLandClass = lc_GenericWater;
				}
				faces[poly->first] = our_face;
			}
		} else
			++gMissingTLIDs;
	}


	printf("Polygons: %d, missing TLIDs from indices: %d, edges with no dominance: %d, bad back links: %d, dead TLIDS: %d\n",
			gPolys, gMissingTLIDs, gBadEdges, gBadBackLink, gDeadTLID);





	int	gLand = 0, gNoID = 0, gNoLocAtAll = 0, gPtOnEdge = 0;
	for (LandmarkInfoMap::const_iterator landmark = landmarks.begin();
		landmark != landmarks.end(); ++landmark)
	{
		++gLand;
		int cfcc = LookupAreaCFCC(landmark->second.cfcc.c_str());
		if (cfcc != -1)
		{
			if (!landmark->second.cenid_polyid.empty())
			{
				FaceIndex::iterator theFace = faces.find(landmark->second.cenid_polyid);
				if (theFace != faces.end())
				{
					theFace->second->mLandClass = kAreaCodes[cfcc].land_class;
				} else {
					fprintf(stderr, "WARNING: Cenid/polyid not found.\n");
					++gNoID;
				}

			} else if (!landmark->second.location.first.empty()) {

				if (kAreaCodes[cfcc].allow_from_point)
				{
					try {
						Pmwx::Locate_type	lt;

						Pmwx::Halfedge_handle h =
							outMap.locate(RawCoordToCoord(landmark->second.location), lt);

						if (lt == Pmwx::EDGE || lt == Pmwx::FACE)
						{
							h->face()->mLandClass = kAreaCodes[cfcc].land_class;
						} else {
							++gPtOnEdge;
							fprintf(stderr, "WARNING: Pt land mark on vertex or out of map.\n");
						}
					} catch (...) {
						++gPtOnEdge;
					}
				} else {
					// TODO: Add pt object
				}
			} else {
				fprintf(stderr, "Warning: landmark without polygon or pt.\n");
				gNoLocAtAll++;
			}
		}
	}
	printf("Total landmarks = %d, total with unknown CENID/POLYID = %d, no Loc = %d, pt on edge = %d\n", gLand, gNoID, gNoLocAtAll, gPtOnEdge);
}
Pmwx::Halfedge_handle	InsertOneSegment(
							const RawCoordPair& p1,
							const RawCoordPair& p2,
							VertexIndex&		index,
							Pmwx&				ioMap)
{
	string	key1 = RawCoordToKey(p1);
	Point_2	pt1  = RawCoordToCoord(p1);
	string	key2 = RawCoordToKey(p2);
	Point_2	pt2  = RawCoordToCoord(p2);

	VertexIndex::iterator i1 = index.find(key1);
	VertexIndex::iterator i2 = index.find(key2);

	Pmwx::Halfedge_handle	he = Pmwx::Halfedge_handle();

#if 0
	Pmwx::Locate_type loc1, loc2;
	ioMap.locate(pt1, loc1);
	ioMap.locate(pt2, loc2);
	CGAL_precondition_msg(loc1 != Pmwx::EDGE, "Pt1 on an edge, will cause CHAOS");
	CGAL_precondition_msg(loc2 != Pmwx::EDGE, "Pt2 on an edge, will cause CHAOS");
	if (i1 == index.end())
		CGAL_precondition_msg(loc1 != Pmwx::VERTEX, "Pt1 on an unindexed vertex, will cause CHAOS");
	if (i2 == index.end())
		CGAL_precondition_msg(loc2 != Pmwx::VERTEX, "Pt2 on an unindexed vertex, will cause CHAOS");
#endif

	if (i1 == index.end())
	{
		if (i2 == index.end())
		{
			// Totally unknown segment.
			Pmwx::Locate_type lt;
			zeroV.Start();
			he = ioMap.locate(pt1, lt);
			CGAL_precondition_msg(lt == Pmwx::FACE || lt == Pmwx::UNBOUNDED_FACE, "Inserting a segment in unknown territory but it's NOT on a face!!");
			Pmwx::Face_handle	fe = (lt == Pmwx::UNBOUNDED_FACE) ? ioMap.unbounded_face() : he->face();
//			he = ioMap.non_intersecting_insert(PM_Curve_2(pt1, pt2));
			he = ioMap.insert_in_face_interior(PM_Curve_2(pt1, pt2), fe);
			zeroV.Stop();
			if (he != Pmwx::Halfedge_handle())
			{
				index[key1] = he->source();
				index[key2] = he->target();
			}
		} else {
			// We know pt 2 but pt 1 is floating.  Make a vector
			// using the vertex handle from 2 and 1's raw value.
			oneV.Start();
			he = ioMap.Planar_map_2::insert_from_vertex(
					PM_Curve_2(i2->second->point(), pt1),
					i2->second);
			oneV.Stop();
			// Now pt 1 gets stored...it is the target of the new halfedge.
			if (he != Pmwx::Halfedge_handle())
			{
				index[key1] = he->target();
				he = he->twin();	// This halfedge goes from 2 to 1, turn it around!
			}
		}
	} else {
		if (i2 == index.end())
		{
			oneV.Start();
			// We know pt 1 but not pt 2
			he = ioMap.Planar_map_2::insert_from_vertex(
					PM_Curve_2(i1->second->point(), pt2),
					i1->second);
			oneV.Stop();
			// Now pt 1 gets stored...it is the target of the new halfedge.
			if (he != Pmwx::Halfedge_handle())
				index[key2] = he->target();
		} else {
			twoV.Start();
			// Both pts are known
				he = ioMap.Planar_map_2::insert_at_vertices(
					PM_Curve_2(i1->second->point(), i2->second->point()),
					i1->second,
					i2->second);
			twoV.Stop();
		}
	}

	if (he == Pmwx::Halfedge_handle())
	{
		return ioMap.halfedges_end();
	}

	// Whenever we create a half edge we have to pick dominance...this works.
	he->mDominant = true;
	return he;
}
예제 #14
0
//-------------------------------------------------------------------------------------------------------------
// UTILITY FUNCTIONS
//-------------------------------------------------------------------------------------------------------------
bool Shader::CompileShaders(ID3D11Device* device, const ShaderDesc& desc)
{
	constexpr const char * SHADER_BINARY_EXTENSION = ".bin";
	mDescriptor = desc;
	HRESULT result;
	ShaderBlobs blobs;
	bool bPrinted = false;

	PerfTimer timer;
	timer.Start();

	// COMPILE SHADER STAGES
	//----------------------------------------------------------------------------
	for (const ShaderStageDesc& stageDesc : desc.stages)
	{
		if (stageDesc.fileName.empty())
			continue;

		// stage.macros
		const std::string sourceFilePath = std::string(Renderer::sShaderRoot + stageDesc.fileName);
		
		const EShaderStage stage = GetShaderTypeFromSourceFilePath(sourceFilePath);

		// USE SHADER CACHE
		//
		const size_t ShaderHash = GeneratePreprocessorDefinitionsHash(stageDesc.macros);
		const std::string cacheFileName = stageDesc.macros.empty()
			? DirectoryUtil::GetFileNameFromPath(sourceFilePath) + SHADER_BINARY_EXTENSION
			: DirectoryUtil::GetFileNameFromPath(sourceFilePath) + "_" + std::to_string(ShaderHash) + SHADER_BINARY_EXTENSION;
		const std::string cacheFilePath = Application::s_ShaderCacheDirectory + "\\" + cacheFileName;
		const bool bUseCachedShaders =
			DirectoryUtil::FileExists(cacheFilePath)
			&& !IsCacheDirty(sourceFilePath, cacheFilePath);
		//---------------------------------------------------------------------------------
		if (!bPrinted)	// quick status print here
		{
			const char* pMsgLoad = bUseCachedShaders ? "Loading cached shader binaries" : "Compiling shader from source";
			Log::Info("\t%s %s...", pMsgLoad, mName.c_str());
			bPrinted = true;
		}
		//---------------------------------------------------------------------------------
		if (bUseCachedShaders)
		{
			blobs.of[stage] = CompileFromCachedBinary(cacheFilePath);
		}
		else
		{
			std::string errMsg;
			ID3D10Blob* pBlob;
			if (CompileFromSource(sourceFilePath, stage, pBlob, errMsg, stageDesc.macros))
			{
				blobs.of[stage] = pBlob;
				CacheShaderBinary(cacheFilePath, blobs.of[stage]);
			}
			else
			{
				Log::Error(errMsg);
				return false;
			}
		}

		CreateShaderStage(device, stage, blobs.of[stage]->GetBufferPointer(), blobs.of[stage]->GetBufferSize());
		SetReflections(blobs);
		//CheckSignatures();

		ShaderLoadDesc loadDesc = {};
		loadDesc.fullPath = sourceFilePath;
		loadDesc.lastWriteTime = std::experimental::filesystem::last_write_time(sourceFilePath);
		mDirectories[stage] = loadDesc;
	}

	// INPUT LAYOUT (VS)
	//---------------------------------------------------------------------------
	// src: https://stackoverflow.com/questions/42388979/directx-11-vertex-shader-reflection
	// setup the layout of the data that goes into the shader
	//
	if(mReflections.vsRefl)
	{

		D3D11_SHADER_DESC shaderDesc = {};
		mReflections.vsRefl->GetDesc(&shaderDesc);
		std::vector<D3D11_INPUT_ELEMENT_DESC> inputLayout(shaderDesc.InputParameters);

		D3D_PRIMITIVE primitiveDesc = shaderDesc.InputPrimitive;

		for (unsigned i = 0; i < shaderDesc.InputParameters; ++i)
		{
			D3D11_SIGNATURE_PARAMETER_DESC paramDesc;
			mReflections.vsRefl->GetInputParameterDesc(i, &paramDesc);

			// fill out input element desc
			D3D11_INPUT_ELEMENT_DESC elementDesc;
			elementDesc.SemanticName = paramDesc.SemanticName;
			elementDesc.SemanticIndex = paramDesc.SemanticIndex;
			elementDesc.InputSlot = 0;
			elementDesc.AlignedByteOffset = D3D11_APPEND_ALIGNED_ELEMENT;
			elementDesc.InputSlotClass = D3D11_INPUT_PER_VERTEX_DATA;
			elementDesc.InstanceDataStepRate = 0;

			// determine DXGI format
			if (paramDesc.Mask == 1)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32_FLOAT;
			}
			else if (paramDesc.Mask <= 3)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32G32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32G32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32_FLOAT;
			}
			else if (paramDesc.Mask <= 7)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32B32_FLOAT;
			}
			else if (paramDesc.Mask <= 15)
			{
				if      (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_UINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32A32_UINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_SINT32)  elementDesc.Format = DXGI_FORMAT_R32G32B32A32_SINT;
				else if (paramDesc.ComponentType == D3D_REGISTER_COMPONENT_FLOAT32) elementDesc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
			}

			inputLayout[i] = elementDesc; //save element desc
		}

		// Try to create Input Layout
		const auto* pData = inputLayout.data();
		if (pData)
		{
			result = device->CreateInputLayout(
				pData,
				shaderDesc.InputParameters,
				blobs.vs->GetBufferPointer(),
				blobs.vs->GetBufferSize(),
				&mpInputLayout);

			if (FAILED(result))
			{
				OutputDebugString("Error creating input layout");
				return false;
			}
		}
	}

	// CONSTANT BUFFERS 
	//---------------------------------------------------------------------------
	// Obtain cbuffer layout information
	for (EShaderStage type = EShaderStage::VS; type < EShaderStage::COUNT; type = (EShaderStage)(type + 1))
	{
		if (mReflections.of[type])
		{
			ReflectConstantBufferLayouts(mReflections.of[type], type);
		}
	}

	// Create CPU & GPU constant buffers
	// CPU CBuffers
	int constantBufferSlot = 0;
	for (const ConstantBufferLayout& cbLayout : m_CBLayouts)
	{
		std::vector<CPUConstantID> cpuBuffers;
		for (D3D11_SHADER_VARIABLE_DESC varDesc : cbLayout.variables)
		{
			CPUConstant c;
			CPUConstantID c_id = static_cast<CPUConstantID>(mCPUConstantBuffers.size());

			c._name = varDesc.Name;
			c._size = varDesc.Size;
			c._data = new char[c._size];
			memset(c._data, 0, c._size);
			m_constants.push_back(std::make_pair(constantBufferSlot, c_id));
			mCPUConstantBuffers.push_back(c);
		}
		++constantBufferSlot;
	}

	// GPU CBuffers
	D3D11_BUFFER_DESC cBufferDesc;
	cBufferDesc.Usage = D3D11_USAGE_DYNAMIC;
	cBufferDesc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
	cBufferDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
	cBufferDesc.MiscFlags = 0;
	cBufferDesc.StructureByteStride = 0;
	for (const ConstantBufferLayout& cbLayout : m_CBLayouts)
	{
		ConstantBufferBinding cBuffer;
		cBufferDesc.ByteWidth = cbLayout.desc.Size;
		if (FAILED(device->CreateBuffer(&cBufferDesc, NULL, &cBuffer.data)))
		{
			OutputDebugString("Error creating constant buffer");
			return false;
		}
		cBuffer.dirty = true;
		cBuffer.shaderStage = cbLayout.stage;
		cBuffer.bufferSlot = cbLayout.bufSlot;
		mConstantBuffers.push_back(cBuffer);
	}


	// TEXTURES & SAMPLERS
	//---------------------------------------------------------------------------
	for (int shaderStage = 0; shaderStage < EShaderStage::COUNT; ++shaderStage)
	{
		unsigned texSlot = 0;	unsigned smpSlot = 0;
		unsigned uavSlot = 0;
		auto& sRefl = mReflections.of[shaderStage];
		if (sRefl)
		{
			D3D11_SHADER_DESC desc = {};
			sRefl->GetDesc(&desc);

			for (unsigned i = 0; i < desc.BoundResources; ++i)
			{
				D3D11_SHADER_INPUT_BIND_DESC shdInpDesc;
				sRefl->GetResourceBindingDesc(i, &shdInpDesc);

				switch (shdInpDesc.Type)
				{
					case D3D_SIT_SAMPLER:
					{
						SamplerBinding smp;
						smp.shaderStage = static_cast<EShaderStage>(shaderStage);
						smp.samplerSlot = smpSlot++;
						mSamplerBindings.push_back(smp);
						mShaderSamplerLookup[shdInpDesc.Name] = static_cast<int>(mSamplerBindings.size() - 1);
					} break;

					case D3D_SIT_TEXTURE:
					{
						TextureBinding tex;
						tex.shaderStage = static_cast<EShaderStage>(shaderStage);
						tex.textureSlot = texSlot++;
						mTextureBindings.push_back(tex);
						mShaderTextureLookup[shdInpDesc.Name] = static_cast<int>(mTextureBindings.size() - 1);
					} break;

					case D3D_SIT_UAV_RWTYPED:
					{
						TextureBinding tex;
						tex.shaderStage = static_cast<EShaderStage>(shaderStage);
						tex.textureSlot = uavSlot++;
						mTextureBindings.push_back(tex);
						mShaderTextureLookup[shdInpDesc.Name] = static_cast<int>(mTextureBindings.size() - 1);
					} break;

					case D3D_SIT_CBUFFER: break;


					default:
						Log::Warning("Unhandled shader input bind type in shader reflection");
						break;

				} // switch shader input type
			} // bound resource
		} // sRefl
	} // shaderStage

	// release blobs
	for (unsigned type = EShaderStage::VS; type < EShaderStage::COUNT; ++type)
	{
		if (blobs.of[type])
			blobs.of[type]->Release();
	}

	return true;
}
예제 #15
0
파일: main.cpp 프로젝트: ajturner/lerc
int _tmain(int argc, _TCHAR* argv[])
{
  // Sample 1: float image, 1 band, with some pixels set to invalid / void, maxZError = 0.1

  int h = 512;
  int w = 512;

  float* zImg = new float[w * h];
  memset(zImg, 0, w * h * sizeof(float));

  LercNS::BitMask bitMask(w, h);
  bitMask.SetAllValid();

  for (int k = 0, i = 0; i < h; i++)
  {
    for (int j = 0; j < w; j++, k++)
    {
      zImg[k] = sqrt((float)(i * i + j * j));    // smooth surface
      zImg[k] += rand() % 20;    // add some small amplitude noise

      if (j % 100 == 0 || i % 100 == 0)    // set some void points
        bitMask.SetInvalid(k);
    }
  }


  // compress into byte arr

  double maxZErrorWanted = 0.1;
  double eps = 0.0001;    // safety margin (optional), to account for finite floating point accuracy
  double maxZError = maxZErrorWanted - eps;

  size_t numBytesNeeded = 0;
  size_t numBytesWritten = 0;
  Lerc lerc;
  PerfTimer pt;

  if (!lerc.ComputeBufferSize((void*)zImg,    // raw image data, row by row, band by band
    Lerc::DT_Float,
    w, h, 1,
    &bitMask,                  // set 0 if all pixels are valid
    maxZError,                 // max coding error per pixel, or precision
    numBytesNeeded))           // size of outgoing Lerc blob
  {
    cout << "ComputeBufferSize failed" << endl;
  }

  size_t numBytesBlob = numBytesNeeded;
  Byte* pLercBlob = new Byte[numBytesBlob];

  pt.start();

  if (!lerc.Encode((void*)zImg,    // raw image data, row by row, band by band
    Lerc::DT_Float,
    w, h, 1,
    &bitMask,           // 0 if all pixels are valid
    maxZError,           // max coding error per pixel, or precision
    pLercBlob,           // buffer to write to, function will fail if buffer too small
    numBytesBlob,        // buffer size
    numBytesWritten))    // num bytes written to buffer
  {
    cout << "Encode failed" << endl;
  }

  pt.stop();

  double ratio = w * h * (0.125 + sizeof(float)) / numBytesBlob;
  cout << "sample 1 compression ratio = " << ratio << ", encode time = " << pt.ms() << " ms" << endl;

  // new data storage
  float* zImg3 = new float[w * h];
  memset(zImg3, 0, w * h * sizeof(float));

  BitMask bitMask3(w, h);
  bitMask3.SetAllValid();


  // decompress

  Lerc::LercInfo lercInfo;
  if (!lerc.GetLercInfo(pLercBlob, numBytesBlob, lercInfo))
    cout << "get header info failed" << endl;

  if (lercInfo.nCols != w || lercInfo.nRows != h || lercInfo.nBands != 1 || lercInfo.dt != Lerc::DT_Float)
    cout << "got wrong lerc info" << endl;

  pt.start();

  if (!lerc.Decode(pLercBlob, numBytesBlob, &bitMask3, w, h, 1, Lerc::DT_Float, (void*)zImg3))
    cout << "decode failed" << endl;

  pt.stop();


  // compare to orig

  double maxDelta = 0;
  for (int k = 0, i = 0; i < h; i++)
  {
    for (int j = 0; j < w; j++, k++)
    {
      if (bitMask3.IsValid(k) != bitMask.IsValid(k))
        cout << "Error in main: decoded bit mask differs from encoded bit mask" << endl;

      if (bitMask3.IsValid(k))
      {
        double delta = fabs(zImg3[k] - zImg[k]);
        if (delta > maxDelta)
          maxDelta = delta;
      }
    }
  }

  cout << "max z error per pixel = " << maxDelta << ", decode time = " << pt.ms() << " ms" << endl;

  delete[] zImg;
  delete[] zImg3;
  delete[] pLercBlob;
  pLercBlob = 0;


  // Sample 2: random byte image, 3 bands, all pixels valid, maxZError = 0 (lossless)

  h = 713;
  w = 257;

  Byte* byteImg = new Byte[w * h * 3];
  memset(byteImg, 0, w * h * 3);

  for (int iBand = 0; iBand < 3; iBand++)
  {
    Byte* arr = byteImg + iBand * w * h;
    for (int k = 0, i = 0; i < h; i++)
      for (int j = 0; j < w; j++, k++)
        arr[k] = rand() % 30;
  }

  // encode 

  if (!lerc.ComputeBufferSize((void*)byteImg, Lerc::DT_Byte, w, h, 3, 0, 0, numBytesNeeded))
    cout << "ComputeBufferSize failed" << endl;

  numBytesBlob = numBytesNeeded;
  pLercBlob = new Byte[numBytesBlob];

  pt.start();

  if (!lerc.Encode((void*)byteImg,    // raw image data, row by row, band by band
    Lerc::DT_Byte,
    w, h, 3,
    0,                   // 0 if all pixels are valid
    0,                   // max coding error per pixel, or precision
    pLercBlob,           // buffer to write to, function will fail if buffer too small
    numBytesBlob,        // buffer size
    numBytesWritten))    // num bytes written to buffer
  {
    cout << "Encode failed" << endl;
  }

  pt.stop();

  ratio = w * h * 3 / (double)numBytesBlob;
  cout << "sample 2 compression ratio = " << ratio << ", encode time = " << pt.ms() << " ms" << endl;

  // new data storage
  Byte* byteImg3 = new Byte[w * h * 3];
  memset(byteImg3, 0, w * h * 3);

  // decompress

  if (!lerc.GetLercInfo(pLercBlob, numBytesBlob, lercInfo))
    cout << "get header info failed" << endl;

  if (lercInfo.nCols != w || lercInfo.nRows != h || lercInfo.nBands != 3 || lercInfo.dt != Lerc::DT_Byte)
    cout << "got wrong lerc info" << endl;

  pt.start();

  if (!lerc.Decode(pLercBlob, numBytesBlob, 0, w, h, 3, Lerc::DT_Byte, (void*)byteImg3))
    cout << "decode failed" << endl;

  pt.stop();

  // compare to orig

  maxDelta = 0;
  for (int k = 0, i = 0; i < h; i++)
    for (int j = 0; j < w; j++, k++)
    {
      double delta = abs(byteImg3[k] - byteImg[k]);
      if (delta > maxDelta)
        maxDelta = delta;
    }

  cout << "max z error per pixel = " << maxDelta << ", decode time = " << pt.ms() << " ms" << endl;

  delete[] byteImg;
  delete[] byteImg3;
  delete[] pLercBlob;
  pLercBlob = 0;


#ifdef TestLegacyData

  Byte* pLercBuffer = new Byte[4 * 2048 * 2048];
  Byte* pDstArr     = new Byte[4 * 2048 * 2048];

  vector<string> fnVec;
  string path = "D:/GitHub/LercOpenSource/testData/";

  fnVec.push_back("amazon3.lerc1");
  fnVec.push_back("tuna.lerc1");
  fnVec.push_back("tuna_0_to_1_w1920_h925.lerc1");

  fnVec.push_back("testbytes.lerc2");
  fnVec.push_back("testHuffman_w30_h20_uchar0.lerc2");
  fnVec.push_back("testHuffman_w30_h20_ucharx.lerc2");
  fnVec.push_back("testHuffman_w1922_h1083_uchar.lerc2");

  fnVec.push_back("testall_w30_h20_char.lerc2");
  fnVec.push_back("testall_w30_h20_byte.lerc2");
  fnVec.push_back("testall_w30_h20_short.lerc2");
  fnVec.push_back("testall_w30_h20_ushort.lerc2");
  fnVec.push_back("testall_w30_h20_long.lerc2");
  fnVec.push_back("testall_w30_h20_ulong.lerc2");
  fnVec.push_back("testall_w30_h20_float.lerc2");

  fnVec.push_back("testall_w1922_h1083_char.lerc2");
  fnVec.push_back("testall_w1922_h1083_byte.lerc2");
  fnVec.push_back("testall_w1922_h1083_short.lerc2");
  fnVec.push_back("testall_w1922_h1083_ushort.lerc2");
  fnVec.push_back("testall_w1922_h1083_long.lerc2");
  fnVec.push_back("testall_w1922_h1083_ulong.lerc2");
  fnVec.push_back("testall_w1922_h1083_float.lerc2");

  fnVec.push_back("testuv_w30_h20_char.lerc2");
  fnVec.push_back("testuv_w30_h20_byte.lerc2");
  fnVec.push_back("testuv_w30_h20_short.lerc2");
  fnVec.push_back("testuv_w30_h20_ushort.lerc2");
  fnVec.push_back("testuv_w30_h20_long.lerc2");
  fnVec.push_back("testuv_w30_h20_ulong.lerc2");
  fnVec.push_back("testuv_w30_h20_float.lerc2");

  fnVec.push_back("testuv_w1922_h1083_char.lerc2");
  fnVec.push_back("testuv_w1922_h1083_byte.lerc2");
  fnVec.push_back("testuv_w1922_h1083_short.lerc2");
  fnVec.push_back("testuv_w1922_h1083_ushort.lerc2");
  fnVec.push_back("testuv_w1922_h1083_long.lerc2");
  fnVec.push_back("testuv_w1922_h1083_ulong.lerc2");
  fnVec.push_back("testuv_w1922_h1083_float.lerc2");

  for (size_t n = 0; n < fnVec.size(); n++)
  {
    string fn = path;
    fn += fnVec[n];

    FILE* fp = 0;
    fopen_s(&fp, fn.c_str(), "rb");
    fseek(fp, 0, SEEK_END);
    size_t fileSize = ftell(fp);    // get the file size
    fclose(fp);
    fp = 0;

    fopen_s(&fp, fn.c_str(), "rb");
    fread(pLercBuffer, 1, fileSize, fp);    // read Lerc blob into buffer
    fclose(fp);
    fp = 0;

    if (!lerc.GetLercInfo(pLercBuffer, fileSize, lercInfo))
      cout << "get header info failed" << endl;
    else
    {
      int w = lercInfo.nCols;
      int h = lercInfo.nRows;
      int nBands = lercInfo.nBands;
      Lerc::DataType dt = lercInfo.dt;

      pt.start();
      
      std::string resultMsg = "ok";
      BitMask bitMask;
      if (!lerc.Decode(pLercBuffer, fileSize, &bitMask, w, h, nBands, dt, (void*)pDstArr))
        resultMsg = "FAILED";

      pt.stop();
      printf("w = %4d, h = %4d, nBands = %2d, dt = %2d, time = %4d ms,  %s :  %s\n", w, h, nBands, (int)dt, pt.ms(), resultMsg.c_str(), fnVec[n].c_str());
    }
  }

#endif

  printf("\npress ENTER\n");
  getchar();
  
	return 0;
}
예제 #16
0
int main(int argc, char* argv[]) {
#ifdef BENCHMARKING
    benchmark(argc, argv);
#else
    // mpi setup
    int numProcs;
    int rank, flag;
    int done = 0;
    MPI_Status status;
    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    // create a buffer for both worker and controller
    static double buffer[BUFFER_SIZE];
    unsigned int niter = argc > 1 ? atoi(argv[1]) : NITER;

    // Setting up the PSF (statically)
    int psfWidth, psfHeight;
    double* psf = ImageQueue::getPsf(&psfWidth, &psfHeight);

    // ---------- CONTROLLER NODE ---------- //
    if (rank == 0) {
        // Set up producer
        ImageQueue images(buffer, BUFFER_SIZE, "../images", numProcs);

        // Print out some details
        int numImages = images.remaining();
        FPRINT("Starting %d iteration(s) on %d image(s)", niter, numImages);
        PerfTimer mainTimer;
        mainTimer.begin();

        int toSend = (unsigned int)numProcs < images.remaining() ? numProcs : images.remaining();
        for (int i = 0; i < toSend; i++) {
            images.pop(i);
            MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD);
        }

        while (images.remaining() > 0) {
            for (int i = 0; i < numProcs; i++) {

                // If an image is received then save it and send the next one
                MPI_Iprobe(i, IMG, MPI_COMM_WORLD, &flag, &status);
                if (flag) {
                    MPI_Recv(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD, &status);
                    images.save(i);
                    images.pop(i);
                    MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, i, IMG, MPI_COMM_WORLD);
                }
            }
        }

        for (int i = 0; i < numProcs; i++) {
            MPI_Send(&done, 1, MPI_INT, i, END, MPI_COMM_WORLD);
        }
        FPRINT("Finished %d image(s) in %f seconds", numImages, mainTimer.getElapsed());
    }

    // ---------- WORKER NODE ---------- //
    else { // worker thread
        // Set up consumer
        DeconvFilter filter(WIDTH, HEIGHT, niter, psf, psfWidth, psfHeight, buffer);
        bool running = true;
        PRINT("Worker thread initialised.");

        while (running) {
            MPI_Iprobe(0, IMG, MPI_COMM_WORLD, &flag, &status);
            if (flag) { // New image
                MPI_Recv(buffer, BUFFER_SIZE, MPI_DOUBLE, 0, IMG, MPI_COMM_WORLD, &status);
                filter.process();
                MPI_Send(buffer, BUFFER_SIZE, MPI_DOUBLE, 0, IMG, MPI_COMM_WORLD);
            }

            MPI_Iprobe(0, END, MPI_COMM_WORLD, &flag, &status);
            if (flag) { // Execution finished
                MPI_Recv(&done, 1, MPI_INT, 0, END, MPI_COMM_WORLD, &status);
                running = false;
            }
        }
        PRINT("Worker thread finished.");
    }

    MPI_Finalize();
#endif
    return 0;
}