ViewDefaultConstruct( type * pointer , size_t capacity ) : m_ptr( pointer ) { Kokkos::RangePolicy< ExecSpace > range( 0 , capacity ); parallel_for( range , *this ); ExecSpace::fence(); }
int GhostBlockBrickedVolume::setRegion( // points to the first voxel to be copied. The voxels at 'source' MUST // have dimensions 'regionSize', must be organized in 3D-array order, and // must have the same voxel type as the volume. const void *source, // coordinates of the lower, left, front corner of the target region const vec3i ®ionCoords, // size of the region that we're writing to, MUST be the same as the // dimensions of source[][][] const vec3i ®ionSize) { // Create the equivalent ISPC volume container and allocate memory for voxel // data. if (ispcEquivalent == nullptr) createEquivalentISPC(); /*! \todo check if we still need this 'computevoxelrange' - in theory we need this only if the app is allowed to query these values, and they're not being set in sharedstructuredvolume, either, so should we actually set them at all!? */ // Compute the voxel value range for unsigned byte voxels if none was // previously specified. Assert2(source,"nullptr source in GhostBlockBrickedVolume::setRegion()"); #ifndef OSPRAY_VOLUME_VOXELRANGE_IN_APP if (findParam("voxelRange") == NULL) { // Compute the voxel value range for float voxels if none was // previously specified. const size_t numVoxelsInRegion = (size_t)regionSize.x * + (size_t)regionSize.y * + (size_t)regionSize.z; if (voxelType == "uchar") computeVoxelRange((unsigned char *)source, numVoxelsInRegion); else if (voxelType == "ushort") computeVoxelRange((unsigned short *)source, numVoxelsInRegion); else if (voxelType == "float") computeVoxelRange((float *)source, numVoxelsInRegion); else if (voxelType == "double") computeVoxelRange((double *) source, numVoxelsInRegion); else { throw std::runtime_error("invalid voxelType in " "GhostBlockBrickedVolume::setRegion()"); } } #endif // Copy voxel data into the volume. const int NTASKS = regionSize.y * regionSize.z; parallel_for(NTASKS, [&](int taskIndex){ ispc::GBBV_setRegion(ispcEquivalent, source, (const ispc::vec3i &)regionCoords, (const ispc::vec3i &)regionSize, taskIndex); }); return true; }
static void apply( const value_type& alpha , const vector_type & x , const value_type & beta , const vector_type & y ) { const size_t row_count = x.dimension_0() ; parallel_for( row_count , Update(alpha,x,beta,y) ); }
__dllexport void ISPCLaunch(void** taskPtr, void* func, void* data, int count) { parallel_for(size_t(0), size_t(count),[&] (const range<size_t>& r) { const size_t threadIndex = tbb::task_arena::current_thread_index(); const size_t threadCount = tbb::task_scheduler_init::default_num_threads(); for (size_t i=r.begin(); i<r.end(); i++) ((TaskFuncType)func)(data,threadIndex,threadCount,i,count); }); }
void parallel_for(loop_by_eager_binary_splitting<control_by_prediction>& lpalgo, const Loop_complexity_measure_fct& loop_compl_fct, Number lo, Number hi, const Body& body) { auto loop_cutoff_fct = [] (Number lo, Number hi) { todo(); return false; }; parallel_for(lpalgo, loop_cutoff_fct, loop_compl_fct, lo, hi, body); }
void axpby( const ConstScalarType & alpha , const ConstVectorType & X , const ConstScalarType & beta , const VectorType & Y ) { typedef AXPBY< ConstScalarType , ConstVectorType , VectorType > functor ; parallel_for( Y.dimension_0() , functor( alpha , X , beta , Y ) ); }
void sample_primary_rays(const Camera &camera, const BufferView<CameraSample> &samples, BufferView<Ray> rays, BufferView<RayDifferential> ray_differentials, bool use_gpu) { parallel_for(primary_ray_sampler{ camera, samples.begin(), rays.begin(), ray_differentials.begin()}, samples.size(), use_gpu); }
Multiply( const matrix_type & A , const size_type nrow , const size_type , // ncol , const vector_type & x , const vector_type & y ) : m_A( A ), m_x( x ), m_y( y ) { parallel_for( nrow , *this ); }
void testThreadedGet() { Cache cache( get, hash, 10000, new ObjectPool(10000) ); parallel_for( blocked_range<size_t>( 0, 10000 ), GetFromCache( cache ) ); BOOST_CHECK_EQUAL( size_t(500), cache.cachedComputations() ); }
__dllexport void ISPCLaunch(void** taskPtr, void* func, void* data, int count) { parallel_for(size_t(0), size_t(count), [&] (const range<size_t>& r) { const size_t threadIndex = TaskSchedulerNew::thread()->threadIndex; const size_t threadCount = TaskSchedulerNew::threadCount(); for (size_t i=r.begin(); i<r.end(); i++) ((TaskFuncType)func)(data,threadIndex,threadCount,i,count); }); }
extern "C" __dllexport void ISPCLaunch(void** taskPtr, void* func, void* data, int count) { parallel_for(0, count,[&] (const range<int>& r) { const int threadIndex = (int) TaskScheduler::threadIndex(); const int threadCount = (int) TaskScheduler::threadCount(); for (int i=r.begin(); i<r.end(); i++) ((ISPCTaskFunc)func)(data,threadIndex,threadCount,i,count); }); }
inline static void apply( const size_t n , const double alpha , const scalar_vector & w ) { FILL op ; op.w = w ; op.alpha = alpha ; parallel_for( n , op ); }
void put_all_files(leveldb::DB* db, const std::vector<std::string>& files, int concurrency) { auto errors = parallel_for(concurrency, serial_read_files, files, db); for (bool err : errors) if (err) { std::cerr << "Errors occured!" << std::endl; std::exit(1); } }
// dense matrix by dense vector multiplication: d := m * v // r: # rows in m; c # columns in m static void dmdvmult(long r, long c, const float* m, const float* v, float* d) { auto outer_loop_compl_fct = [c] (long lo, long hi) { return (hi - lo) * c; }; parallel_for(outerlp, outer_loop_compl_fct, 0l, r, [&] (long i) { const float* row_i = &m[i*c]; d[i] = ddotprod(innerlp, r, 0.0f, row_i, v); }); }
void NativeCurvesISA::commit_helper() { if (native_curves.size() != curves.size()) { native_curves = APIBuffer<unsigned>(scene->device,curves.size(),sizeof(unsigned int),true); parallel_for(size_t(0), curves.size(), size_t(1024), [&] ( const range<size_t> r) { for (size_t i=r.begin(); i<r.end(); i++) { if (curves[i]+3 >= numVertices()) native_curves[i] = 0xFFFFFFF0; // invalid curves stay invalid this way else native_curves[i] = unsigned(4*i); } }); } if (native_vertices.size() != vertices.size()) native_vertices.resize(vertices.size()); parallel_for(vertices.size(), [&] ( const size_t i ) { if (native_vertices[i].size() != 4*curves.size()) native_vertices[i] = APIBuffer<Vec3fa>(scene->device,4*curves.size(),sizeof(Vec3fa),true); parallel_for(size_t(0), curves.size(), size_t(1024), [&] ( const range<size_t> rj ) { for (size_t j=rj.begin(); j<rj.end(); j++) { const unsigned id = curves[j]; if (id+3 >= numVertices()) continue; // ignore invalid curves const Vec3fa v0 = vertices[i][id+0]; const Vec3fa v1 = vertices[i][id+1]; const Vec3fa v2 = vertices[i][id+2]; const Vec3fa v3 = vertices[i][id+3]; const InputCurve3fa icurve(v0,v1,v2,v3); OutputCurve3fa ocurve; convert<Vec3fa>(icurve,ocurve); native_vertices[i].store(4*j+0,ocurve.v0); native_vertices[i].store(4*j+1,ocurve.v1); native_vertices[i].store(4*j+2,ocurve.v2); native_vertices[i].store(4*j+3,ocurve.v3); } }); }); native_vertices0 = native_vertices[0]; }
static void apply( const mesh_type & mesh , const elem_matrices_type & elem_matrices , const elem_vectors_type & elem_vectors , const scalar_type elem_coeff_K , const scalar_type elem_coeff_Q ) { ElementComputation comp( mesh , elem_matrices , elem_vectors , elem_coeff_K , elem_coeff_Q ); const size_t elem_count = mesh.elem_node_ids.dimension_0(); parallel_for( elem_count , comp ); }
void test_thread_pool_recursion () { std::cout << "\nTesting thread pool recursion" << std::endl; static spin_mutex print_mutex; thread_pool *pool (default_thread_pool()); pool->resize (2); parallel_for (0, 10, [&](int id, int64_t i){ // sleep long enough that we can push all the jobs before any get // done. Sysutil::usleep (10); // then run something else that itself will push jobs onto the // thread pool queue. parallel_for (0, 10, [&](int id, int64_t i){ Sysutil::usleep (2); spin_lock lock (print_mutex); std::cout << " recursive running thread " << id << std::endl; }); }); }
std::vector<sframe> subplan_executor::run( const std::vector<std::shared_ptr<planner_node>>& stuff_to_run_in_parallel, const materialize_options& exec_params) { std::vector<sframe> ret(stuff_to_run_in_parallel.size()); parallel_for(0, stuff_to_run_in_parallel.size(), [&](const size_t i) { ret[i] = run(stuff_to_run_in_parallel[i], exec_params); }); return ret; }
size_t lualambda_master::make_lambda(const std::string& lambda_str) { size_t lambda_hash = std::hash<std::string>()(lambda_str); std::string newstr = lambda_str; if (boost::starts_with(newstr,"LUA")) { newstr = newstr.substr(3); } parallel_for (0, num_workers(), [&](size_t i) { clients[i]->doString(newstr); clients[i]->doString("lambda" + std::to_string(lambda_hash) + " = __lambda__transfer__"); }); return lambda_hash; }
void FractalRenderer::performRendering(void) { printf("data=%p, width=%d, heigth=%d, zoom=%f, resolution=%d, posx=%f, posy=%f\n", m_data, m_image_x, m_image_y, m_scale, m_resolution, m_normalizedPosition.x, m_normalizedPosition.y); sf::Clock timer; parallel_for(tbb::blocked_range2d<unsigned, unsigned>(0, m_image_x, 50, 0, m_image_y, 50), MandelbrotRenderer(m_data, m_image_x, m_image_y, m_scale, m_resolution, m_normalizedPosition)); m_texture.update(m_data); m_lastRenderingTime = timer.getElapsedTime(); }
void reduce_floats() { srand(time(NULL)); const int numStars = 5000; star* array_of_structs = new star[numStars]; stars struct_of_arrays(numStars); populate_star_array(array_of_structs, numStars); populate_stars(&struct_of_arrays); cout << "map pattern with arrays-of-structures" << endl; cout << "and structures-of-arrays" << endl; cout << "----------------------------------------" << endl; cout << "calculating longitude from right ascension of stars" << endl; auto get_longitude = [](const double ascension) { const double degrees = ascension * 15.0; const double adjusted_degrees = degrees > 360.0 ? degrees - 180.0 : degrees; return adjusted_degrees; }; double output[numStars]; const int numIterations = 10; clock_t start = clock(); for(int i = 0, end = numIterations; i != end; ++i) { parallel_for(0, numStars, 1, [&struct_of_arrays, &output, get_longitude](int i) { output[i] = get_longitude(struct_of_arrays.ascension[i]); }); } double seconds = (clock() - start) / (double)CLOCKS_PER_SEC; cout << "structure-of-arrays: " << seconds << " seconds." << endl; start = clock(); for(int i = 0, end = numIterations; i != end; ++i) { parallel_for(0, numStars, 1, [&array_of_structs, &output, get_longitude](int i) { output[i] = get_longitude(array_of_structs[i].ascension); }); } seconds = (clock() - start) / (double)CLOCKS_PER_SEC; cout << "array-of-structures: " << seconds << " seconds." << endl; }
void assign (size_t n, const Scalar& val) { /* Resize if necessary (behavour of std:vector) */ if(n>capacity()) DV::resize(size_t (n*_extra_storage)); _size = n; /* Assign value either on host or on device */ if( DV::modified_host >= DV::modified_device ) { set_functor_host f(DV::h_view,val); parallel_for(n,f); DV::t_host::device_type::fence(); DV::modified_host++; } else { set_functor f(DV::d_view,val); parallel_for(n,f); DV::t_dev::device_type::fence(); DV::modified_device++; } }
int main(int argc, char** argv) { if(argc != 2) { printf("Usage: %s <password hash>\n",argv[0]); return 1; } int notfound=1; SpaceSearcher s( argv[1], ¬found ); parallel_for( tbb::blocked_range<long>( 0, SEARCH_SPACE ), s ); return 0; }
/* called by the C++ code to render */ extern "C" void device_render (int* pixels, const unsigned int width, const unsigned int height, const float time, const ISPCCamera& camera) { const int numTilesX = (width +TILE_SIZE_X-1)/TILE_SIZE_X; const int numTilesY = (height+TILE_SIZE_Y-1)/TILE_SIZE_Y; parallel_for(size_t(0),size_t(numTilesX*numTilesY),[&](const range<size_t>& range) { for (size_t i=range.begin(); i<range.end(); i++) renderTileTask((int)i,pixels,width,height,time,camera,numTilesX,numTilesY); }); }
parray<long> weights(long n, const Weight& weight) { const long k = 1024; long m = 1 + ((n - 1) / k); long tot; parray<long> rs(n + 1); #ifdef MANUAL_CONTROL if (n < WEIGHTS_THRESHOLD) { tot = weights_seq(weight, 0, n, 0, rs.begin()); rs[n] = tot; return rs; } #endif par::cstmt(weights_contr<>::weights, [&] { return n; }, [&] { if (n <= k) { tot = weights_seq(weight, 0, n, 0, rs.begin()); } else { parray<long> sums(m); parallel_for(0l, m, [&] (long i) { long lo = i * k; long hi = std::min(lo + k, n); sums[i] = 0; for (long j = lo; j < hi; j++) { sums[i] += weight(j); } }); parray<long> scans = rec(sums); parallel_for(0l, m, [&] (long i) { long lo = i * k; long hi = std::min(lo + k, n); weights_seq(weight, lo, hi, scans[i], rs.begin()+lo); }); tot = rs[n-1] + weight(n-1); } }, [&] { tot = weights_seq(weight, 0, n, 0, rs.begin()); }); rs[n] = tot; return rs; }
inline static void unpack( const array_type & arg_output , const buffer_type & arg_input , const size_type arg_begin , const size_type arg_count ) { UnpackArray op ; op.output = arg_output ; op.input = arg_input ; op.base = arg_begin ; parallel_for( arg_count , op ); }
void assign_closest(std::vector<IterRange>& parvec, vuad& totals, int K, leveldb::DB* work_db, const std::vector<GDELTMini>& centroids, int concurrency, vuai& cluster_sizes) { for (auto& i : cluster_sizes) i->store(0); for (auto& d : totals) d->store(0); parallel_for(concurrency, assign_closest_serial, parvec, K, work_db, std::cref(centroids), std::ref(totals), std::ref(cluster_sizes)); }
ViewRemap( const OutputView & arg_out , const InputView & arg_in ) : output( arg_out ), input( arg_in ) , n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) ) , n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) ) , n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) ) , n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) ) , n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) ) , n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) ) , n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) ) , n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) ) { parallel_for( n0 , *this ); }
void cv::fastNlMeansDenoisingMulti( InputArrayOfArrays _srcImgs, OutputArray _dst, int imgToDenoiseIndex, int temporalWindowSize, float h, int templateWindowSize, int searchWindowSize) { std::vector<Mat> srcImgs; _srcImgs.getMatVector(srcImgs); fastNlMeansDenoisingMultiCheckPreconditions( srcImgs, imgToDenoiseIndex, temporalWindowSize, templateWindowSize, searchWindowSize ); _dst.create(srcImgs[0].size(), srcImgs[0].type()); Mat dst = _dst.getMat(); switch (srcImgs[0].type()) { case CV_8U: parallel_for(cv::BlockedRange(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker<uchar>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC2: parallel_for(cv::BlockedRange(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker<cv::Vec2b>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; case CV_8UC3: parallel_for(cv::BlockedRange(0, srcImgs[0].rows), FastNlMeansMultiDenoisingInvoker<cv::Vec3b>( srcImgs, imgToDenoiseIndex, temporalWindowSize, dst, templateWindowSize, searchWindowSize, h)); break; default: CV_Error(CV_StsBadArg, "Unsupported matrix format! Only uchar, Vec2b, Vec3b are supported"); } }
void launch_animateSphere(animateSphereFunc func, int taskSize, Vertex* vertices, const float rcpNumTheta, const float rcpNumPhi, const Vec3fa& pos, const float r, const float f) { parallel_for(size_t(0),size_t(taskSize),[&] (const range<size_t>& m) { for (size_t i=m.begin(); i<m.end(); i++) func(i,vertices,rcpNumTheta,rcpNumPhi,pos,r,f); }); }