Пример #1
0
 ViewDefaultConstruct( type * pointer , size_t capacity )
   : m_ptr( pointer )
 {
   Kokkos::RangePolicy< ExecSpace > range( 0 , capacity );
   parallel_for( range , *this );
   ExecSpace::fence();
 }
  int GhostBlockBrickedVolume::setRegion(
      // points to the first voxel to be copied. The voxels at 'source' MUST
      // have dimensions 'regionSize', must be organized in 3D-array order, and
      // must have the same voxel type as the volume.
      const void *source,
      // coordinates of the lower, left, front corner of the target region
      const vec3i &regionCoords,
      // size of the region that we're writing to, MUST be the same as the
      // dimensions of source[][][]
                                    const vec3i &regionSize)
  {
    // Create the equivalent ISPC volume container and allocate memory for voxel
    // data.
    if (ispcEquivalent == nullptr)
      createEquivalentISPC();

    /*! \todo check if we still need this 'computevoxelrange' - in
        theory we need this only if the app is allowed to query these
        values, and they're not being set in sharedstructuredvolume,
        either, so should we actually set them at all!? */
    // Compute the voxel value range for unsigned byte voxels if none was
    // previously specified.
    Assert2(source,"nullptr source in GhostBlockBrickedVolume::setRegion()");

#ifndef OSPRAY_VOLUME_VOXELRANGE_IN_APP
    if (findParam("voxelRange") == NULL) {
      // Compute the voxel value range for float voxels if none was
      // previously specified.
      const size_t numVoxelsInRegion
        = (size_t)regionSize.x *
        + (size_t)regionSize.y *
        + (size_t)regionSize.z;
      if (voxelType == "uchar")
        computeVoxelRange((unsigned char *)source, numVoxelsInRegion);
      else if (voxelType == "ushort")
        computeVoxelRange((unsigned short *)source, numVoxelsInRegion);
      else if (voxelType == "float")
        computeVoxelRange((float *)source, numVoxelsInRegion);
      else if (voxelType == "double")
        computeVoxelRange((double *) source, numVoxelsInRegion);
      else {
        throw std::runtime_error("invalid voxelType in "
                                 "GhostBlockBrickedVolume::setRegion()");
      }
    }
#endif
    // Copy voxel data into the volume.
    const int NTASKS = regionSize.y * regionSize.z;

    parallel_for(NTASKS, [&](int taskIndex){
        ispc::GBBV_setRegion(ispcEquivalent,
                             source,
                             (const ispc::vec3i &)regionCoords,
                             (const ispc::vec3i &)regionSize,
                             taskIndex);
    });

    return true;
  }
Пример #3
0
  static void apply( const value_type&   alpha ,
		     const vector_type & x ,
		     const value_type &  beta ,
                     const vector_type & y )
  {
    const size_t row_count = x.dimension_0() ;
    parallel_for( row_count , Update(alpha,x,beta,y) );
  }
Пример #4
0
 __dllexport void ISPCLaunch(void** taskPtr, void* func, void* data, int count) 
 {      
   parallel_for(size_t(0), size_t(count),[&] (const range<size_t>& r) {
       const size_t threadIndex = tbb::task_arena::current_thread_index();
       const size_t threadCount = tbb::task_scheduler_init::default_num_threads();
       for (size_t i=r.begin(); i<r.end(); i++) ((TaskFuncType)func)(data,threadIndex,threadCount,i,count);
     });
 }
Пример #5
0
void parallel_for(loop_by_eager_binary_splitting<control_by_prediction>& lpalgo,
                  const Loop_complexity_measure_fct& loop_compl_fct,
                  Number lo, Number hi, const Body& body) {
  auto loop_cutoff_fct = [] (Number lo, Number hi) {
    todo();
    return false;
  };
  parallel_for(lpalgo, loop_cutoff_fct, loop_compl_fct, lo, hi, body);
}
Пример #6
0
void axpby( const ConstScalarType & alpha ,
            const ConstVectorType & X ,
            const ConstScalarType & beta ,
            const      VectorType & Y )
{
  typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ;

  parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) );
}
Пример #7
0
void sample_primary_rays(const Camera &camera,
                         const BufferView<CameraSample> &samples,
                         BufferView<Ray> rays,
                         BufferView<RayDifferential> ray_differentials,
                         bool use_gpu) {
    parallel_for(primary_ray_sampler{
        camera, samples.begin(), rays.begin(), ray_differentials.begin()},
        samples.size(), use_gpu);
}
Пример #8
0
 Multiply( const matrix_type & A ,
           const size_type nrow ,
           const size_type , // ncol ,
           const vector_type & x ,
           const vector_type & y )
   : m_A( A ), m_x( x ), m_y( y )
 {
   parallel_for( nrow , *this );
 }
Пример #9
0
	void testThreadedGet()
	{

		Cache cache( get, hash, 10000, new ObjectPool(10000) );
		
		parallel_for( blocked_range<size_t>( 0, 10000 ), GetFromCache( cache ) );

		BOOST_CHECK_EQUAL( size_t(500), cache.cachedComputations() );
	}
Пример #10
0
 __dllexport void ISPCLaunch(void** taskPtr, void* func, void* data, int count) 
 {      
   parallel_for(size_t(0), size_t(count), [&] (const range<size_t>& r) {
       const size_t threadIndex = TaskSchedulerNew::thread()->threadIndex;
       const size_t threadCount = TaskSchedulerNew::threadCount();
       for (size_t i=r.begin(); i<r.end(); i++) 
         ((TaskFuncType)func)(data,threadIndex,threadCount,i,count);
     });
 }
Пример #11
0
 extern "C" __dllexport void ISPCLaunch(void** taskPtr, void* func, void* data, int count)
 {
   parallel_for(0, count,[&] (const range<int>& r) {
       const int threadIndex = (int) TaskScheduler::threadIndex();
       const int threadCount = (int) TaskScheduler::threadCount();
       for (int i=r.begin(); i<r.end(); i++)
         ((ISPCTaskFunc)func)(data,threadIndex,threadCount,i,count);
     });
 }
Пример #12
0
 inline static
 void apply( const size_t n ,
             const double          alpha ,
             const scalar_vector & w )
 {
   FILL op ;
   op.w = w ;
   op.alpha = alpha ;
   parallel_for( n , op );
 }
Пример #13
0
void put_all_files(leveldb::DB* db, const std::vector<std::string>& files,
                   int concurrency) {
  auto errors = parallel_for(concurrency, serial_read_files, files, db);

  for (bool err : errors)
    if (err) {
      std::cerr << "Errors occured!" << std::endl;
      std::exit(1);
    }
}
Пример #14
0
// dense matrix by dense vector multiplication: d := m * v
// r: # rows in m; c # columns in m
static
void dmdvmult(long r, long c, const float* m, const float* v, float* d) {
  auto outer_loop_compl_fct = [c] (long lo, long hi) {
    return (hi - lo) * c;
  };
  parallel_for(outerlp, outer_loop_compl_fct, 0l, r, [&] (long i) {
    const float* row_i = &m[i*c];
    d[i] = ddotprod(innerlp, r, 0.0f, row_i, v);
  });
}
Пример #15
0
 void NativeCurvesISA::commit_helper()
 {
   if (native_curves.size() != curves.size()) 
   {
     native_curves = APIBuffer<unsigned>(scene->device,curves.size(),sizeof(unsigned int),true);
     parallel_for(size_t(0), curves.size(), size_t(1024), [&] ( const range<size_t> r) {
         for (size_t i=r.begin(); i<r.end(); i++) {
           if (curves[i]+3 >= numVertices()) native_curves[i] = 0xFFFFFFF0; // invalid curves stay invalid this way
           else                              native_curves[i] = unsigned(4*i);
         }
       });
   }
   
   if (native_vertices.size() != vertices.size())
     native_vertices.resize(vertices.size());
   
   parallel_for(vertices.size(), [&] ( const size_t i ) {
       
       if (native_vertices[i].size() != 4*curves.size())
         native_vertices[i] = APIBuffer<Vec3fa>(scene->device,4*curves.size(),sizeof(Vec3fa),true);
       
       parallel_for(size_t(0), curves.size(), size_t(1024), [&] ( const range<size_t> rj ) {
           
           for (size_t j=rj.begin(); j<rj.end(); j++)
           {
             const unsigned id = curves[j];
             if (id+3 >= numVertices()) continue; // ignore invalid curves
             const Vec3fa v0 = vertices[i][id+0];
             const Vec3fa v1 = vertices[i][id+1];
             const Vec3fa v2 = vertices[i][id+2];
             const Vec3fa v3 = vertices[i][id+3];
             const InputCurve3fa icurve(v0,v1,v2,v3);
             OutputCurve3fa ocurve; convert<Vec3fa>(icurve,ocurve);
             native_vertices[i].store(4*j+0,ocurve.v0);
             native_vertices[i].store(4*j+1,ocurve.v1);
             native_vertices[i].store(4*j+2,ocurve.v2);
             native_vertices[i].store(4*j+3,ocurve.v3);
           }
         });
     });
   native_vertices0 = native_vertices[0];
 }
  static void apply( const mesh_type  & mesh ,
                     const elem_matrices_type & elem_matrices ,
                     const elem_vectors_type  & elem_vectors ,
                     const scalar_type  elem_coeff_K ,
                     const scalar_type  elem_coeff_Q )
  {
    ElementComputation comp( mesh , elem_matrices , elem_vectors , elem_coeff_K , elem_coeff_Q );
    const size_t elem_count = mesh.elem_node_ids.dimension_0();

    parallel_for( elem_count , comp );
  }
Пример #17
0
void
test_thread_pool_recursion ()
{
    std::cout << "\nTesting thread pool recursion" << std::endl;
    static spin_mutex print_mutex;
    thread_pool *pool (default_thread_pool());
    pool->resize (2);
    parallel_for (0, 10, [&](int id, int64_t i){
        // sleep long enough that we can push all the jobs before any get
        // done.
        Sysutil::usleep (10);
        // then run something else that itself will push jobs onto the
        // thread pool queue.
        parallel_for (0, 10, [&](int id, int64_t i){
            Sysutil::usleep (2);
            spin_lock lock (print_mutex);
            std::cout << "  recursive running thread " << id << std::endl;
        });
    });
}
Пример #18
0
std::vector<sframe> subplan_executor::run(
    const std::vector<std::shared_ptr<planner_node>>& stuff_to_run_in_parallel,
    const materialize_options& exec_params) {

  std::vector<sframe> ret(stuff_to_run_in_parallel.size()); 

  parallel_for(0, stuff_to_run_in_parallel.size(), [&](const size_t i) {
      ret[i] = run(stuff_to_run_in_parallel[i], exec_params);
  });

  return ret; 
}
Пример #19
0
 size_t lualambda_master::make_lambda(const std::string& lambda_str) {
   size_t lambda_hash = std::hash<std::string>()(lambda_str);
   std::string newstr = lambda_str;
   if (boost::starts_with(newstr,"LUA")) {
     newstr = newstr.substr(3);
   }
   parallel_for (0, num_workers(), [&](size_t i) {
     clients[i]->doString(newstr);
     clients[i]->doString("lambda" + std::to_string(lambda_hash) + " = __lambda__transfer__");
   });
   return lambda_hash;
 }
Пример #20
0
void FractalRenderer::performRendering(void)
{
	printf("data=%p, width=%d, heigth=%d, zoom=%f, resolution=%d, posx=%f, posy=%f\n",
		   m_data, m_image_x, m_image_y, m_scale, m_resolution, m_normalizedPosition.x, m_normalizedPosition.y);
	
	sf::Clock timer;
	parallel_for(tbb::blocked_range2d<unsigned, unsigned>(0, m_image_x, 50, 0, m_image_y, 50),
				 MandelbrotRenderer(m_data, m_image_x, m_image_y, m_scale, m_resolution, m_normalizedPosition));
	
	m_texture.update(m_data);
	m_lastRenderingTime = timer.getElapsedTime();
}
Пример #21
0
void reduce_floats() {
  srand(time(NULL));
  const int numStars = 5000;
  star* array_of_structs = new star[numStars];
  stars struct_of_arrays(numStars);
  populate_star_array(array_of_structs, numStars);
  populate_stars(&struct_of_arrays);

  cout << "map pattern with arrays-of-structures" << endl;
  cout << "and structures-of-arrays" << endl;
  cout << "----------------------------------------" << endl;
  cout << "calculating longitude from right ascension of stars" << endl;

  auto get_longitude = [](const double ascension) {
    const double degrees = ascension * 15.0;
    const double adjusted_degrees = degrees > 360.0 ? degrees - 180.0 : degrees;
    return adjusted_degrees;
  };

  double output[numStars];
  const int numIterations = 10;

  clock_t start = clock();
  for(int i = 0, end = numIterations; i != end; ++i) {
    parallel_for(0, numStars, 1, [&struct_of_arrays, &output, get_longitude](int i) {
        output[i] = get_longitude(struct_of_arrays.ascension[i]);
      });
  }
  double seconds = (clock() - start) / (double)CLOCKS_PER_SEC;
  cout << "structure-of-arrays: " << seconds << " seconds." << endl;

  start = clock();
  for(int i = 0, end = numIterations; i != end; ++i) {
    parallel_for(0, numStars, 1, [&array_of_structs, &output, get_longitude](int i) {
        output[i] = get_longitude(array_of_structs[i].ascension);
      });
  }
  seconds = (clock() - start) / (double)CLOCKS_PER_SEC;
  cout << "array-of-structures: " << seconds << " seconds." << endl;
}
Пример #22
0
  void assign (size_t n, const Scalar& val) {

    /* Resize if necessary (behavour of std:vector) */

    if(n>capacity())
      DV::resize(size_t (n*_extra_storage));
    _size = n;

          /* Assign value either on host or on device */

    if( DV::modified_host >= DV::modified_device ) {
      set_functor_host f(DV::h_view,val);
      parallel_for(n,f);
      DV::t_host::device_type::fence();
      DV::modified_host++;
    } else {
      set_functor f(DV::d_view,val);
      parallel_for(n,f);
      DV::t_dev::device_type::fence();
      DV::modified_device++;
    }
  }
Пример #23
0
int main(int argc, char** argv) {
    if(argc != 2) {
        printf("Usage: %s <password hash>\n",argv[0]);
        return 1;
    }
    
    int notfound=1;
    SpaceSearcher s( argv[1], &notfound );

    parallel_for( tbb::blocked_range<long>( 0, SEARCH_SPACE ), s );

    return 0;
}
Пример #24
0
/* called by the C++ code to render */
extern "C" void device_render (int* pixels,
                    const unsigned int width,
                    const unsigned int height,
                    const float time,
                    const ISPCCamera& camera)
{
  const int numTilesX = (width +TILE_SIZE_X-1)/TILE_SIZE_X;
  const int numTilesY = (height+TILE_SIZE_Y-1)/TILE_SIZE_Y;
  parallel_for(size_t(0),size_t(numTilesX*numTilesY),[&](const range<size_t>& range) {
    for (size_t i=range.begin(); i<range.end(); i++)
      renderTileTask((int)i,pixels,width,height,time,camera,numTilesX,numTilesY);
  }); 
}
Пример #25
0
parray<long> weights(long n, const Weight& weight) {
  const long k = 1024;
  long m = 1 + ((n - 1) / k);
  long tot;
  parray<long> rs(n + 1);
#ifdef MANUAL_CONTROL
  if (n < WEIGHTS_THRESHOLD) {
    tot = weights_seq(weight, 0, n, 0, rs.begin());
    rs[n] = tot;
    return rs;
  }
#endif
  par::cstmt(weights_contr<>::weights, [&] { return n; }, [&] {
    if (n <= k) {
      tot = weights_seq(weight, 0, n, 0, rs.begin());
    } else {
      parray<long> sums(m);
      parallel_for(0l, m, [&] (long i) {
        long lo = i * k;
        long hi = std::min(lo + k, n);
        sums[i] = 0;
        for (long j = lo; j < hi; j++) {
          sums[i] += weight(j);
        }
      });
      parray<long> scans = rec(sums);
      parallel_for(0l, m, [&] (long i) {
        long lo = i * k;
        long hi = std::min(lo + k, n);
        weights_seq(weight, lo, hi, scans[i], rs.begin()+lo);
      });
      tot = rs[n-1] + weight(n-1);
    }
  }, [&] {
    tot = weights_seq(weight, 0, n, 0, rs.begin());
  });
  rs[n] = tot;
  return rs;
}
Пример #26
0
 inline
 static
 void unpack( const array_type  & arg_output ,
              const buffer_type & arg_input ,
              const size_type     arg_begin ,
              const size_type     arg_count )
 {
   UnpackArray op ;
   op.output = arg_output ;
   op.input  = arg_input ;
   op.base   = arg_begin ;
   parallel_for( arg_count , op );
 }
Пример #27
0
void assign_closest(std::vector<IterRange>& parvec,
                    vuad& totals,
                    int K,
                    leveldb::DB* work_db,
                    const std::vector<GDELTMini>& centroids,
                    int concurrency,
                    vuai& cluster_sizes) {
  for (auto& i : cluster_sizes) i->store(0);
  for (auto& d : totals) d->store(0);
  parallel_for(concurrency, assign_closest_serial,
               parvec, K, work_db, std::cref(centroids),
               std::ref(totals), std::ref(cluster_sizes));
}
Пример #28
0
 ViewRemap( const OutputView & arg_out , const InputView & arg_in )
   : output( arg_out ), input( arg_in )
   , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
   , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
   , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
   , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
   , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
   , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
   , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
   , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
   {
     parallel_for( n0 , *this );
   }
Пример #29
0
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst,
                                    int imgToDenoiseIndex, int temporalWindowSize,
                                    float h, int templateWindowSize, int searchWindowSize)
{
    std::vector<Mat> srcImgs;
    _srcImgs.getMatVector(srcImgs);

    fastNlMeansDenoisingMultiCheckPreconditions(
        srcImgs, imgToDenoiseIndex,
        temporalWindowSize, templateWindowSize, searchWindowSize
    );
    _dst.create(srcImgs[0].size(), srcImgs[0].type());
    Mat dst = _dst.getMat();

    switch (srcImgs[0].type()) {
        case CV_8U:
            parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
                FastNlMeansMultiDenoisingInvoker<uchar>(
                    srcImgs, imgToDenoiseIndex, temporalWindowSize,
                    dst, templateWindowSize, searchWindowSize, h));
            break;
        case CV_8UC2:
            parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
                FastNlMeansMultiDenoisingInvoker<cv::Vec2b>(
                    srcImgs, imgToDenoiseIndex, temporalWindowSize,
                    dst, templateWindowSize, searchWindowSize, h));
            break;
        case CV_8UC3:
            parallel_for(cv::BlockedRange(0, srcImgs[0].rows),
                FastNlMeansMultiDenoisingInvoker<cv::Vec3b>(
                    srcImgs, imgToDenoiseIndex, temporalWindowSize,
                    dst, templateWindowSize, searchWindowSize, h));
            break;
        default:
            CV_Error(CV_StsBadArg,
                "Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported");
    }
}
void launch_animateSphere(animateSphereFunc func,
			  int taskSize,
			  Vertex* vertices, 
			  const float rcpNumTheta,
			  const float rcpNumPhi,
			  const Vec3fa& pos, 
			  const float r,
			  const float f)
{
  parallel_for(size_t(0),size_t(taskSize),[&] (const range<size_t>& m) {
      for (size_t i=m.begin(); i<m.end(); i++)
        func(i,vertices,rcpNumTheta,rcpNumPhi,pos,r,f);
    });
}