OMPNumThreadsScope(uint64_t const #if defined(_OPENMP) newnumthreads = getMaxThreads() #endif ) : prevnumthreads(getMaxThreads()) { #if defined(_OPENMP) omp_set_num_threads(newnumthreads); #endif }
static void checkThreads (float limit_percent) { int nThreads; nThreads = getNumberOfThreads (); if (nThreads > getMaxThreads () * limit_percent ) { throw KurentoException (NOT_ENOUGH_RESOURCES, "Too many threads"); } }
// calculates \sum_K v_i'*A*v_i double dosum(Matrix A, Matrix v) { double alpha=0.0; Matrix temp; int i, t; t = getMaxThreads(); temp = createMatrix(A->rows, t); #pragma omp parallel for schedule(static) reduction(+:alpha) for(i=0;i<v->cols;++i) { MxV(temp->col[getCurrentThread()],A,v->col[i], 1.0, 0.0, 'N'); alpha += dotproduct(temp->col[getCurrentThread()],v->col[i]); } freeMatrix(temp); return alpha; }
static void checkThreads (float limit_percent) { const rlim_t maxThreads = getMaxThreads (); if (maxThreads <= 0 || maxThreads == RLIM_INFINITY) { return; } const rlim_t maxThreadsKms = (rlim_t)(maxThreads * limit_percent); const rlim_t nThreads = (rlim_t)getNumberOfThreads (); if (nThreads > maxThreadsKms) { std::ostringstream oss; oss << "Reached KMS threads limit: " << maxThreadsKms; std::string exMessage = oss.str(); oss << " (system max: " << maxThreads << ");" << " set a higher limit with `ulimit -Su`, or in the KMS service settings (/etc/default/kurento-media-server)"; std::string logMessage = oss.str(); GST_WARNING ("%s", logMessage.c_str()); throw KurentoException (NOT_ENOUGH_RESOURCES, exMessage); } }
::libmaus2::autoarray::AutoArray<uint64_t> toWaveletTreeBitsParallel( ::libmaus2::bitio::CompactArray * C, bool const verbose, uint64_t const #if defined(_OPENMP) numthreads #endif ) { uint64_t const pn = ((C->n + 63) / 64)*64; ::libmaus2::autoarray::AutoArray<uint64_t> B( pn/64 , false ); ::libmaus2::parallel::OMPLock block; typedef std::pair<uint64_t, uint64_t> qtype; std::deque < qtype > Q; Q.push_back( qtype(0,C->n) ); if ( verbose ) std::cerr << "(Sorting bits..."; for ( int ib = (C->getB())-1; ib>=0; --ib ) { std::deque < qtype > Q2; uint64_t const sb = (C->getB()-ib-1); uint64_t const mask = (1ull << ib); if ( verbose ) std::cerr << "(l=" << ib << ")"; ::libmaus2::bitio::CompactSparseArray S(C->D,C->n, C->getB() - sb , sb , C->getB()); while ( Q.size() ) { uint64_t l = Q.front().first, r = Q.front().second; Q.pop_front(); // std::cerr << "[" << l << "," << r << "]" << std::endl; uint64_t const numpackets = getMaxThreads() * 2; ::libmaus2::autoarray::AutoArray < uint64_t > aones(numpackets+1); ::libmaus2::autoarray::AutoArray < uint64_t > azeroes(numpackets+1); uint64_t const intervalsize = r-l; uint64_t const packetsize = ( intervalsize + numpackets - 1 ) / numpackets; if ( verbose ) std::cerr << "(c01/b"; #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t ones = 0; uint64_t low = std::min ( l + h * packetsize, r ); uint64_t const rlow = low; uint64_t const high = std::min ( low + packetsize, r ); uint64_t const low64 = std::min ( ((low+63)/64)*64, high ); uint64_t const high64 = high & (~(63ull)); // std::cerr << "low=" << low << " low64=" << low64 << std::endl; /** * align low to 64 **/ block.lock(); for ( ; low != low64 ; ++low ) { uint64_t const v = (C->get(low)&mask)>>ib; ones += v; ::libmaus2::bitio::putBit(B.get(), low, v); } block.unlock(); /** * handle full blocks of 64 values **/ if ( low != high ) { assert ( low % 64 == 0 ); assert ( high64 >= low ); uint64_t * Bptr = B.get() + (low/64); while ( low != high64 ) { uint64_t vb = 0; uint64_t const lh = low+64; for ( ; low != lh ; ++low ) { uint64_t const v = (C->get(low)&mask)>>ib; ones += v; vb <<= 1; vb |= v; } (*Bptr++) = vb; } } /** * handle rest **/ block.lock(); for ( ; (low != high) ; ++low ) { uint64_t const v = (C->get(low)&mask)>>ib; ones += v; ::libmaus2::bitio::putBit(B.get(), low, v); } block.unlock(); uint64_t const zeroes = (high-rlow)-ones; aones [ h ] = ones; azeroes [ h ] = zeroes; } if ( verbose ) std::cerr << ")"; /** * compute prefix sums for zeroes and ones **/ { uint64_t c = 0; for ( uint64_t i = 0; i < numpackets + 1; ++i ) { uint64_t const t = aones[i]; aones[i] = c; c += t; } } { uint64_t c = 0; for ( uint64_t i = 0; i < numpackets + 1; ++i ) { uint64_t const t = azeroes[i]; azeroes[i] = c; c += t; } } uint64_t const ones = aones[numpackets]; uint64_t const zeros = (r-l)-ones; ::libmaus2::autoarray::AutoArray < ::libmaus2::bitio::CompactArray::unique_ptr_type > ACZ(numpackets); ::libmaus2::autoarray::AutoArray < ::libmaus2::bitio::CompactArray::unique_ptr_type > ACO(numpackets); if ( verbose ) std::cerr << "(a"; for ( uint64_t h = 0; h < numpackets; ++h ) { ::libmaus2::bitio::CompactArray::unique_ptr_type tACZ( new ::libmaus2::bitio::CompactArray( azeroes [ h+1 ] - azeroes[ h ], C->getB() - sb ) ); ACZ[h] = UNIQUE_PTR_MOVE(tACZ); ::libmaus2::bitio::CompactArray::unique_ptr_type tACO( new ::libmaus2::bitio::CompactArray( aones [ h+1 ] - aones[ h ], C->getB() - sb ) ); ACO[h] = UNIQUE_PTR_MOVE(tACO); } if ( verbose ) std::cerr << ")"; if ( verbose ) std::cerr << "(d"; #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = std::min ( l + h * packetsize, r ); uint64_t const high = std::min ( low + packetsize, r ); uint64_t zp = 0; uint64_t op = 0; ::libmaus2::bitio::CompactArray & CO = *ACO[h]; ::libmaus2::bitio::CompactArray & CZ = *ACZ[h]; for ( uint64_t i = low; i != high; ++i ) { uint64_t const v = S.get(i); if ( v & mask ) CO.set ( op++, v); else CZ.set ( zp++, v); } assert ( zp == azeroes[h+1]-azeroes[h] ); assert ( op == aones[h+1]-aones[h] ); } if ( verbose ) std::cerr << ")"; std::vector < CopyBackPacket > zpacketstodo; for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = l + azeroes[h]; uint64_t const high = low + (azeroes[h+1]-azeroes[h]); if ( high-low ) zpacketstodo.push_back ( CopyBackPacket(h,low,high) ); } std::vector < CopyBackPacket > opacketstodo; for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = l + azeroes[numpackets ] + aones[h]; uint64_t const high = low + (aones[h+1]-aones[h]); if ( high-low ) opacketstodo.push_back ( CopyBackPacket(h,low,high) ); } std::vector < std::vector < CopyBackPacket > > zpackets; while ( zpacketstodo.size() ) { std::vector < CopyBackPacket > zpacketsnewtodo; std::vector < CopyBackPacket > nlist; nlist.push_back(zpacketstodo.front()); for ( uint64_t i = 1; i < zpacketstodo.size(); ++i ) if ( CopyBackPacket::overlap(nlist.back(), zpacketstodo[i], C->getB()) ) zpacketsnewtodo.push_back(zpacketstodo[i]); else nlist.push_back(zpacketstodo[i]); zpackets.push_back(nlist); zpacketstodo = zpacketsnewtodo; } std::vector < std::vector < CopyBackPacket > > opackets; while ( opacketstodo.size() ) { std::vector < CopyBackPacket > opacketsnewtodo; std::vector < CopyBackPacket > nlist; nlist.push_back(opacketstodo.front()); for ( uint64_t i = 1; i < opacketstodo.size(); ++i ) if ( CopyBackPacket::overlap(nlist.back(), opacketstodo[i], C->getB()) ) opacketsnewtodo.push_back(opacketstodo[i]); else nlist.push_back(opacketstodo[i]); opackets.push_back(nlist); opacketstodo = opacketsnewtodo; } // std::cerr << "zpackets: " << zpackets.size() << " opackets: " << opackets.size() << std::endl; if ( verbose ) std::cerr << "(cb"; for ( uint64_t q = 0; q < zpackets.size(); ++q ) #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t j = 0; j < static_cast<int64_t>(zpackets[q].size()); ++j ) { CopyBackPacket const CBP = zpackets[q][j]; uint64_t ac = CBP.low; ::libmaus2::bitio::CompactArray & CZ = *ACZ[CBP.h]; for ( uint64_t zc = 0 ; zc != CBP.high-CBP.low; ++zc ) S.set ( ac++ , CZ.get(zc) ); } for ( uint64_t q = 0; q < opackets.size(); ++q ) #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t j = 0; j < static_cast<int64_t>(opackets[q].size()); ++j ) { CopyBackPacket const CBP = opackets[q][j]; uint64_t ac = CBP.low; ::libmaus2::bitio::CompactArray & CO = *ACO[CBP.h]; for ( uint64_t oc = 0 ; oc != CBP.high-CBP.low; ++oc ) S.set ( ac++ , CO.get(oc) ); } if ( verbose ) std::cerr << ")"; if ( zeros ) Q2.push_back ( qtype(l,l+zeros) ); if ( ones ) Q2.push_back ( qtype(r-ones,r) ); } // std::cerr << std::endl; uint64_t const numpackets = getMaxThreads() * 2; uint64_t const intervalsize = C->n; uint64_t const packetsize = ( intervalsize + numpackets - 1 ) / numpackets; std::vector < CopyBackPacket > packetstodo; for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = std::min(h*packetsize,C->n); uint64_t const high = std::min(low+packetsize,C->n); if ( high-low ) packetstodo.push_back ( CopyBackPacket(h,low,high) ); } std::vector < std::vector < CopyBackPacket > > packets; while ( packetstodo.size() ) { std::vector < CopyBackPacket > packetsnewtodo; std::vector < CopyBackPacket > nlist; nlist.push_back(packetstodo.front()); for ( uint64_t i = 1; i < packetstodo.size(); ++i ) if ( CopyBackPacket::overlap(nlist.back(), packetstodo[i], C->getB()) ) packetsnewtodo.push_back(packetstodo[i]); else nlist.push_back(packetstodo[i]); packets.push_back(nlist); packetstodo = packetsnewtodo; } for ( uint64_t q = 0; q < packets.size(); ++q ) #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t h = 0; h < static_cast<int64_t>(packets[q].size()); ++h ) { CopyBackPacket const CBP = packets[q][h]; for ( uint64_t i = CBP.low; i < CBP.high; ++i ) ::libmaus2::bitio::putBit ( C->D , i*C->getB() + sb , ::libmaus2::bitio::getBit(B.get(), i) ); } Q = Q2; }
void Galois::Runtime::LL::initTID() { TID = next.next(); assert(TID < getMaxThreads()); }