Ejemplos de parallel_reduce en C++ (Cpp)

Ejemplo n.º 1

0

Mostrar archivo

Archivo: parallel_reduce.c Proyecto: alexschlueter/mgpp

int main() {
  int data[] = {1,2,3,4,5,6,7,8,9,10};

  int m  = reduce(max, data, 10);
  int s = reduce(sum, data, 10);

  printf("max : %i; sum: %i\n", m, s);

  int pm = parallel_reduce(max, data, 10);
  int ps = parallel_reduce(sum, data, 10);

  printf("parallel max : %i; parallel sum: %i\n", pm, ps);
  return 0;
}

Ejemplo n.º 2

0

Mostrar archivo

Archivo: PerfTestBlasKernels.hpp Proyecto: ArchRobison/kokkos

void dot( const ConstVectorType & X ,
          const Finalize & finalize )
{
  typedef DotSingle< ConstVectorType >  functor ;

  parallel_reduce( X.dimension_0() , functor( X ) , finalize );
}

Ejemplo n.º 3

0

Mostrar archivo

Archivo: PerfTestBlasKernels.hpp Proyecto: Pakketeretet2/lammps

void dot( const ConstVectorType & X ,
          const Finalize & finalize )
{
  typedef DotSingle< ConstVectorType >  functor ;

  parallel_reduce( X.extent(0) , functor( X ) , finalize );
}

Ejemplo n.º 4

0

Mostrar archivo

Archivo: main.cpp Proyecto: ellcom/tbb

double parallelPearsonCorrelationCoefficient(double *a, double *b) {
    
    double meanA = parallelMean(a);
    double meanB = parallelMean(b);
    
    double standardDeviationA = parallelStandardDeviation(a,&meanA);
    double standardDeviationB = parallelStandardDeviation(b,&meanB);
    // TOP of fraction
    double value = parallel_reduce(blocked_range<size_t>(0, vec_size, 10000), double(0),
                                   [=](blocked_range<size_t> &r, double sum) -> double {
                                       for(size_t i=r.begin();i!=r.end();++i){
                                           sum += ((a[i]-meanA) * (b[i]-meanB));
                                       }
                                       return sum;
                                   },
                                   [=](double a, double b){
                                       return a+b;
                                   });
    
    value *= 1.0/vec_size;
    // BOTTOM of fraction
	value /= (standardDeviationA*standardDeviationB);
    
    return value;
}

Ejemplo n.º 5

0

Mostrar archivo

Archivo: life.cpp Proyecto: ptkila/cowichan

void CowichanTBB::life(BoolMatrix input, BoolMatrix output) {
  GameOfLife game(input, output, nr, nc);

  for (index_t i = 0; i < LIFE_ITERATIONS; ++i) {

    // update CA simulation
    parallel_reduce(Range2D(0, nr, 0, nc), game, auto_partitioner());

    // check if there are alive cells
    if (!game.isAlive()) {
      no_cells_alive();
    }

    // swap arrays (ping-pong approach)
    game.swap();

  }

  // final result is in input - copy to output
  if (LIFE_ITERATIONS % 2 == 0) {
    for (index_t r = 0; r < nr; r++) {
      for (index_t c = 0; c < nc; c++) {
        MATRIX_RECT(output, r, c) = MATRIX_RECT(input, r, c);
      }
    }
  }
}

Ejemplo n.º 6

0

Mostrar archivo

Archivo: SPHSolver.cpp Proyecto: sumitneup/KorraFluidSolver

void
SPHSolver::CalculateDensity(
    FluidParticle* particle
    )
{
    // -- Compute density over a kernel function of neighbors

#ifdef USE_TBB
    float density = parallel_reduce(
        blocked_range<FluidParticle**>( particle->neighbors, particle->neighbors + particle->neighborsCount ),
        0.f,
        [&](const blocked_range<FluidParticle**>& range, float init)->float {
            for( FluidParticle** neighbor = range.begin(); neighbor != range.end(); ++neighbor)
            {
                init += KernelPoly6(glm::distance((*neighbor)->Position(), particle->Position()), m_kernelRadius);
            }
            return init;
        },
        []( float x, float y )->float {
            return x + y;
        }
    );
#else
    float density = 0.0f;
    for (size_t i = 0; i < particle->neighborsCount; ++i) {
        FluidParticle* neighbor = particle->neighbors[i];
        float tempDensity = KernelPoly6(glm::distance(neighbor->Position(), particle->Position()), m_kernelRadius);
        density += tempDensity;
    }
#endif;
    density = FluidParticle::mass * density;
    particle->SetDensity(density);
}

Ejemplo n.º 7

0

Mostrar archivo

Archivo: SceneCacheThreadingTest.cpp Proyecto: ainaerco/cortex

	void testFakeAttributeRead()
	{
		task_scheduler_init scheduler( 100 );
		
		TestSceneCache task( "fake" );
		
		parallel_reduce( blocked_range<size_t>( 0, 100 ), task );
 		BOOST_CHECK( task.errors() == 100000 );
	}

Ejemplo n.º 8

0

Mostrar archivo

 inline static
 value_type apply( const size_t n ,
                   const scalar_vector & x )
 {
   value_type result = 0 ;
   Dot op ; op.x = x ;
   parallel_reduce( n , op , result );
   return result ;
 }

Ejemplo n.º 9

0

Mostrar archivo

Archivo: Threading.cpp Proyecto: amitamitamitamit/spring

	void InitThreadPool() {
		boost::uint32_t systemCores   = Threading::GetAvailableCoresMask();
		boost::uint32_t mainAffinity  = systemCores;
#ifndef UNIT_TEST
		mainAffinity &= configHandler->GetUnsigned("SetCoreAffinity");
#endif
		boost::uint32_t ompAvailCores = systemCores & ~mainAffinity;

		{
#ifndef UNIT_TEST
			int workerCount = std::min(ThreadPool::GetMaxThreads() - 1, configHandler->GetUnsigned("WorkerThreadCount"));
			ThreadPool::SetThreadSpinTime(configHandler->GetUnsigned("WorkerThreadSpinTime"));
#else
			int workerCount = -1;
#endif
			const int numCores = ThreadPool::GetMaxThreads();

			// For latency reasons our worker threads yield rarely and so eat a lot cputime with idleing.
			// So it's better we always leave 1 core free for our other threads, drivers & OS
			if (workerCount < 0) {
				if (numCores == 2) {
					workerCount = numCores;
				} else if (numCores < 6) {
					workerCount = numCores - 1;
				} else {
					workerCount = numCores / 2;
				}
			}
			if (workerCount > numCores) {
				LOG_L(L_WARNING, "Set ThreadPool workers to %i, but there are just %i cores!", workerCount, numCores);
				workerCount = numCores;
			}

			ThreadPool::SetThreadCount(workerCount);
		}

		// set affinity of worker threads
		boost::uint32_t ompCores = 0;
		ompCores = parallel_reduce([&]() -> boost::uint32_t {
			const int i = ThreadPool::GetThreadNum();

			// 0 is the source thread, skip
			if (i == 0)
				return 0;

			boost::uint32_t ompCore = GetCpuCoreForWorkerThread(i - 1, ompAvailCores, mainAffinity);
			//boost::uint32_t ompCore = ompAvailCores;
			Threading::SetAffinity(ompCore);
			return ompCore;
		}, [](boost::uint32_t a, boost::unique_future<boost::uint32_t>& b) -> boost::uint32_t { return a | b.get(); });

		// affinity of mainthread
		boost::uint32_t nonOmpCores = ~ompCores;
		if (mainAffinity == 0) mainAffinity = systemCores;
		Threading::SetAffinityHelper("Main", mainAffinity & nonOmpCores);
	}

Ejemplo n.º 10

0

Mostrar archivo

Archivo: SPHSolver.cpp Proyecto: sumitneup/KorraFluidSolver

void
SPHSolver::CalculatePressureForceField(
    FluidParticle* particle
    )
{
    // -- Compute pressure gradient
    glm::vec3 pressureGrad(0.0f);
    float particleDensitySquared = particle->Density() * particle->Density();

#ifdef USE_TBB
    pressureGrad = parallel_reduce(
        blocked_range<FluidParticle**>( particle->neighbors, particle->neighbors + particle->neighborsCount ),
        glm::vec3(0.0),
        [&](const blocked_range<FluidParticle**>& range, glm::vec3 init)->glm::vec3 {
            for( FluidParticle** neighbor = range.begin(); neighbor != range.end(); ++neighbor )
            {
                glm::vec3 r = particle->Position() - (*neighbor)->Position();
                float x = glm::distance((*neighbor)->Position(), particle->Position());
                glm::vec3 kernelGrad = GradKernelSpiky(r, x, m_kernelRadius);

                float neighborDensitySquared = (*neighbor)->Density() * (*neighbor)->Density();
                float tempPressureForce =
                    (
                        (particle->Pressure() / particleDensitySquared) +
                        ((*neighbor)->Pressure() / neighborDensitySquared)
                    );
                init += tempPressureForce * kernelGrad;
            }
            return init;
        },
        []( glm::vec3 x, glm::vec3 y )->glm::vec3 {
            return x + y;
        }
    );
#else
    for (size_t i = 0; i < particle->neighborsCount; ++i) {
        FluidParticle* neighbor = particle->neighbors[i];
        glm::vec3 r = particle->Position() - neighbor->Position();
        float x = glm::distance(neighbor->Position(), particle->Position());
        glm::vec3 kernelGrad = GradKernelSpiky(r, x, m_kernelRadius);

        float neighborDensitySquared = neighbor->Density() * neighbor->Density();
        float tempPressureForce =
            (
                particle->Pressure() / particleDensitySquared +
                neighbor->Pressure() / neighborDensitySquared
            );
        pressureGrad += tempPressureForce * kernelGrad;
    }
#endif
    pressureGrad = -pressureGrad * FluidParticle::mass * FluidParticle::mass;
    particle->SetPressureForce(pressureGrad);

}

Ejemplo n.º 11

0

Mostrar archivo

Archivo: main.cpp Proyecto: ellcom/tbb

double parallelStandardDeviation(double *a, double *meanValue) {
    // reduce by pointing to a place in memory for i in a[] then remove the meanValue and then square
    // emnumerate the above for all a[] then squareroot and divide by array size
    // This is the Lambda syntax,
    return sqrt(parallel_reduce(blocked_range<double*>(a, a+vec_size),0.0,
                           [&](const blocked_range<double*>& r, double sum)->double {
                               for(double* i=r.begin(); i!=r.end(); ++i)
                                   sum += pow(*i-*meanValue,2.0);
                               return sum;
                           }, [](double x, double y)->double {return x+y;}
    )/vec_size);
}

Ejemplo n.º 12

0

Mostrar archivo

Archivo: graphbuilder.cpp Proyecto: crepererum/GraBaSS

void buildGraph(std::vector<std::pair<datadim_t, mdMap_t>> data, std::vector<std::pair<discretedim_t, discretedim_t>> dataDiscrete, std::shared_ptr<gc::Graph> graph, data_t threshold) {
	std::cout << "Build initial graph: " << std::flush;

	long edgeCount = 0;
	data_t xMax = std::numeric_limits<data_t>::lowest();

	for (std::size_t i = 0; i < data.size(); i++) {
		std::list<std::size_t> refs;

		// reverse search existing vertices
		TBBSearchHelper helper1(i, graph);
		parallel_reduce(tbb::blocked_range<std::size_t>(0, i), helper1);
		refs.splice(refs.end(), helper1.refs);

		// do not add self reference (=i)

		// test edges too non exisiting vertices
		TBBEdgeHelper helper2(data[i].first, data[i].second, dataDiscrete[i].first, dataDiscrete[i].second, &data, &dataDiscrete, threshold);
		parallel_reduce(tbb::blocked_range<std::size_t>(i + 1, data.size()), helper2);
		xMax = std::max(xMax, helper2.xMax);
		refs.splice(refs.end(), helper2.refs);

		// store
		graph->add(refs);
		edgeCount += refs.size();

		// report progress
		if (i % 100 == 0) {
			std::cout << i << std::flush;
		} else if (i % 10 == 0) {
			std::cout << "." << std::flush;
		}
	}

	std::cout << "done (" << (edgeCount / 2) << " edges, max="<< xMax << ")" << std::endl;
}

Ejemplo n.º 13

0

Mostrar archivo

Archivo: parallel_reduce.cpp Proyecto: JianpingCAI/libigl

    bool operator() ()
    {
      bool passed = true;
      printf("%s::%s ... ",TOSTRING(isa),name);
      fflush(stdout);

      const size_t M = 10;
      for (size_t N=10; N<10000000; N*=2.1f)
      {
        /* sequentially calculate sum of squares */
        size_t sum0 = 0;
        for (size_t i=0; i<N; i++) {
          sum0 += i*i;
        }

        /* parallel calculation of sum of squares */
	double t0 = getSeconds();
        for (size_t m=0; m<M; m++)
        {
          size_t sum1 = parallel_reduce( size_t(0), size_t(N), size_t(1024), size_t(0), [&](const range<size_t>& r) -> size_t
          {
            size_t s = 0;
            for (size_t i=r.begin(); i<r.end(); i++) 
              s += i*i;
            return s;
          }, 
          [](const size_t v0, const size_t v1) {
            return v0+v1;
          });
          passed = sum0 == sum1;
        }
	double t1 = getSeconds();
	printf("%zu/%3.2fM ",N,1E-6*double(N*M)/(t1-t0));
      }
      
      /* output if test passed or not */
      if (passed) printf("[passed]\n");
      else        printf("[failed]\n");
      
      return passed;
    }

Ejemplo n.º 14

0

Mostrar archivo

Archivo: Threading.cpp Proyecto: 9heart/spring

	void InitThreadPool() {
		boost::uint32_t systemCores   = Threading::GetAvailableCoresMask();
		boost::uint32_t mainAffinity  = systemCores;
		boost::uint32_t ompAvailCores = systemCores & ~mainAffinity;
#ifndef UNIT_TEST
		mainAffinity = systemCores & configHandler->GetUnsigned("SetCoreAffinity");
#endif

		{
			int workerCount = -1;
#ifndef UNIT_TEST
			workerCount = configHandler->GetUnsigned("WorkerThreadCount");
			ThreadPool::SetThreadSpinTime(configHandler->GetUnsigned("WorkerThreadSpinTime"));
#endif
			// For latency reasons our worker threads yield rarely and so eat a lot cputime with idleing.
			// So it's better we always leave 1 core free for our other threads, drivers & OS
			if (workerCount < 0) workerCount = ThreadPool::GetMaxThreads() - 1;
			//if (workerCount > ThreadPool::GetMaxThreads()) LOG_L(L_WARNING, "");

			ThreadPool::SetThreadCount(workerCount);
		}

		// set affinity of worker threads
		boost::uint32_t ompCores = 0;
		ompCores = parallel_reduce([&]() -> boost::uint32_t {
			const int i = ThreadPool::GetThreadNum();
			if (i != 0) {
				// 0 is the source thread, skip
				boost::uint32_t ompCore = GetCpuCoreForWorkerThread(i - 1, ompAvailCores, mainAffinity);
				Threading::SetAffinity(ompCore);
				return ompCore;
			}
			return 0;
		}, [](boost::uint32_t a, boost::unique_future<boost::uint32_t>& b) -> boost::uint32_t { return a | b.get(); });

		// affinity of mainthread
		boost::uint32_t nonOmpCores = ~ompCores;
		if (mainAffinity == 0) mainAffinity = systemCores;
		Threading::SetAffinityHelper("Main", mainAffinity & nonOmpCores);
	}

Ejemplo n.º 15

0

Mostrar archivo

Archivo: norm.cpp Proyecto: ptkila/cowichan

void CowichanTBB::norm(PointVector pointsIn, PointVector pointsOut) {

  MinMaxReducer minmax(pointsIn);

  // find min/max coordinates
  parallel_reduce(Range(0, n), minmax, auto_partitioner());

  Point minPoint = minmax.getMinimum();
  Point maxPoint = minmax.getMaximum();

  // compute scaling factors
  real xfactor = (real)((maxPoint.x == minPoint.x) ?
      0.0 : 1.0 / (maxPoint.x - minPoint.x));
  real yfactor = (real)((maxPoint.y == minPoint.y) ?
      0.0 : 1.0 / (maxPoint.y - minPoint.y));

  Normalizer normalizer(pointsIn, pointsOut, minPoint.x, minPoint.y, xfactor,
      yfactor);

  // normalize the vector
  parallel_for(Range(0, n), normalizer, auto_partitioner());

}

Ejemplo n.º 16

0

Mostrar archivo

typename XVector::scalar_type V_Dot( const XVector & x, const YVector & y)
{
  V_DotFunctor<XVector,YVector> f(x,y);
  return parallel_reduce(x.dimension_0(),f);
}

Ejemplo n.º 17

0

Mostrar archivo

Archivo: bvh4_builder_twolevel.cpp Proyecto: eyalsoreq/embree

    void BVH4BuilderTwoLevel::build(size_t threadIndex, size_t threadCount) 
    {
      /* delete some objects */
      size_t N = scene->size();
      if (N < objects.size()) {
        parallel_for(N, objects.size(), [&] (const range<size_t>& r) {
            for (size_t i=r.begin(); i<r.end(); i++) {
              delete builders[i]; builders[i] = nullptr;
              delete objects[i]; objects[i] = nullptr;
            }
          });
      }

      /* reset memory allocator */
      bvh->alloc.reset();
      
      /* skip build for empty scene */
      const size_t numPrimitives = scene->getNumPrimitives<TriangleMesh,1>();
      if (numPrimitives == 0) {
        prims.resize(0);
        bvh->set(BVH4::emptyNode,empty,0);
        return;
      }

      double t0 = bvh->preBuild(TOSTRING(isa) "::BVH4BuilderTwoLevel");

#if PROFILE
	profile(2,20,numPrimitives,[&] (ProfileTimer& timer)
        {
#endif
          
      /* resize object array if scene got larger */
      if (objects.size()  < N) objects.resize(N);
      if (builders.size() < N) builders.resize(N);
      if (refs.size()     < N) refs.resize(N);
      nextRef = 0;
      
      /* create of acceleration structures */
      parallel_for(size_t(0), N, [&] (const range<size_t>& r) 
      {
        for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
        {
          TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID);
          
          /* verify meshes got deleted properly */
          if (mesh == nullptr || mesh->numTimeSteps != 1) {
            assert(objectID < objects.size () && objects[objectID] == nullptr);
            assert(objectID < builders.size() && builders[objectID] == nullptr);
            continue;
          }
          
          /* create BVH and builder for new meshes */
          if (objects[objectID] == nullptr)
            createTriangleMeshAccel(mesh,(AccelData*&)objects[objectID],builders[objectID]);
        }
      });

      /* parallel build of acceleration structures */
      parallel_for(size_t(0), N, [&] (const range<size_t>& r) 
      {
        for (size_t objectID=r.begin(); objectID<r.end(); objectID++)
        {
          /* ignore if no triangle mesh or not enabled */
          TriangleMesh* mesh = scene->getTriangleMeshSafe(objectID);
          if (mesh == nullptr || !mesh->isEnabled() || mesh->numTimeSteps != 1) 
            continue;
        
          BVH4*    object  = objects [objectID]; assert(object);
          Builder* builder = builders[objectID]; assert(builder);
          
          /* build object if it got modified */
#if !PROFILE 
          if (mesh->isModified()) 
#endif
            builder->build(0,0);
          
          /* create build primitive */
          if (!object->bounds.empty())
            refs[nextRef++] = BVH4BuilderTwoLevel::BuildRef(object->bounds,object->root);
        }
      });
      
      /* fast path for single geometry scenes */
      if (nextRef == 1) { 
        bvh->set(refs[0].node,refs[0].bounds(),numPrimitives);
        return;
      }

      /* open all large nodes */
      refs.resize(nextRef);
      open_sequential(numPrimitives); 
      
      /* fast path for small geometries */
      if (refs.size() == 1) { 
        bvh->set(refs[0].node,refs[0].bounds(),numPrimitives);
        return;
      }

      /* compute PrimRefs */
      prims.resize(refs.size());
      const PrimInfo pinfo = parallel_reduce(size_t(0), refs.size(), size_t(1024), PrimInfo(empty), [&] (const range<size_t>& r) -> PrimInfo
      {
        PrimInfo pinfo(empty);
        for (size_t i=r.begin(); i<r.end(); i++) {
          pinfo.add(refs[i].bounds());
          prims[i] = PrimRef(refs[i].bounds(),(size_t)refs[i].node);
        }
        return pinfo;
      }, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });

      /* skip if all objects where empty */
      if (pinfo.size() == 0)
        bvh->set(BVH4::emptyNode,empty,0);

      /* otherwise build toplevel hierarchy */
      else
      {
        BVH4::NodeRef root;
        BVHBuilderBinnedSAH::build<BVH4::NodeRef>
          (root,
           [&] { return bvh->alloc.threadLocal2(); },
           [&] (const isa::BVHBuilderBinnedSAH::BuildRecord& current, BVHBuilderBinnedSAH::BuildRecord* children, const size_t N, FastAllocator::ThreadLocal2* alloc) -> int
           {
             BVH4::Node* node = (BVH4::Node*) alloc->alloc0.malloc(sizeof(BVH4::Node)); node->clear();
             for (size_t i=0; i<N; i++) {
               node->set(i,children[i].pinfo.geomBounds);
               children[i].parent = (size_t*)&node->child(i);
             }
             *current.parent = bvh->encodeNode(node);
             return 0;
           },
           [&] (const BVHBuilderBinnedSAH::BuildRecord& current, FastAllocator::ThreadLocal2* alloc) -> int
           {
             assert(current.prims.size() == 1);
             *current.parent = (BVH4::NodeRef) prims[current.prims.begin()].ID();
             return 1;
           },
           [&] (size_t dn) { bvh->scene->progressMonitor(0); },
           prims.data(),pinfo,BVH4::N,BVH4::maxBuildDepthLeaf,4,1,1,1.0f,1.0f);
        
        bvh->set(root,pinfo.geomBounds,numPrimitives);
      }

#if PROFILE
      }); 
#endif

      bvh->alloc.cleanup();
      bvh->postBuild(t0);
    }

Ejemplo n.º 18

0

Mostrar archivo

Archivo: TBBCode.cpp Proyecto: Chalenko/PCM

double ParallelTBBSclMlt(const double* A, const double* B, const int len){
	ScalarMultiplicator mul(A, B);
	parallel_reduce(blocked_range<int>(0, len), mul);
	return mul.Result();
}

Ejemplo n.º 19

0

Mostrar archivo

Archivo: main.cpp Proyecto: ellcom/tbb

double parallelMean(double *a) {
    return parallel_reduce(blocked_range<double*>(a,a+vec_size),0.0,
                           [](const blocked_range<double*>& r, double value)->double {
                               return accumulate(r.begin(),r.end(),value);
                           },plus<double>())/vec_size;
}

Ejemplo n.º 20

0

Mostrar archivo

Archivo: Kokkos_Bitset_impl.hpp Proyecto: HuiWritingFoo/kokkos

 size_type apply() const
 {
   size_type count = 0u;
   parallel_reduce(m_bitset.m_blocks.dimension_0(), *this, count);
   return count;
 }

Ejemplo n.º 21

0

Mostrar archivo

Archivo: bbox.cpp Proyecto: ibaned/omega_h

BBox<dim> find_bounding_box(Reals coords) {
  CHECK(coords.size() % dim == 0);
  return parallel_reduce(coords.size() / dim, BBoxFunctor<dim>(coords));
}

Ejemplo n.º 22

0

Mostrar archivo

Archivo: TBBCode.cpp Proyecto: Chalenko/PCM

double ParallelTBBSum(const double* src, const int len){
	VectorSummator sum(src);
	parallel_reduce(blocked_range<int>(0, len), sum);
	return sum.Result();
}