void ROITracker::updateDelay( const PixelViewports& pvps, const uint8_t* ticket ) { LBASSERT( _needsUpdate ); LBASSERTINFO( ticket == _ticket, "Wrong ticket" ); if( ticket != _ticket ) { LBERROR << "Wrong ticket" << std::endl; return; } uint32_t totalAreaFound = 0; for( uint32_t i = 0; i < pvps.size(); i++ ) totalAreaFound += pvps[ i ].getArea(); Area& area = (*_curFrame)[ _lastStage ].areas.back(); if( totalAreaFound < area.pvp.getArea()*4/5 ) { // ROI cutted enough, reset failure statistics area.lastSkip = 0; }else { // disable ROI for next frames, if it was failing before, // increase number of frames to skip area.lastSkip = LB_MIN( area.lastSkip*2 + 1, 64 ); area.skip = area.lastSkip; } _needsUpdate = false; }
//---------------------------------------------------------------------- // read //---------------------------------------------------------------------- void SocketConnection::readNB( void* buffer, const uint64_t bytes ) { if( isClosed() ) return; WSABUF wsaBuffer = { LB_MIN( bytes, 65535 ), reinterpret_cast< char* >( buffer ) }; DWORD flags = 0; ResetEvent( _overlappedRead.hEvent ); _overlappedDone = 0; const int result = WSARecv( _readFD, &wsaBuffer, 1, &_overlappedDone, &flags, &_overlappedRead, 0 ); if( result == 0 ) // got data already { if( _overlappedDone == 0 ) // socket closed { LBDEBUG << "Got EOF, closing connection" << std::endl; close(); } SetEvent( _overlappedRead.hEvent ); } else if( GetLastError() != WSA_IO_PENDING ) { LBWARN << "Could not start overlapped receive: " << lunchbox::sysError << ", closing connection" << std::endl; close(); } }
void FramerateEqualizer::_init() { const Compound* compound = getCompound(); if( _nSamples > 0 || !compound ) return; _nSamples = 1; // Subscribe to child channel load events const Compounds& children = compound->getChildren(); LBASSERT( _loadListeners.empty( )); _loadListeners.resize( children.size( )); for( size_t i = 0; i < children.size(); ++i ) { Compound* child = children[i]; const uint32_t period = child->getInheritPeriod(); LoadListener& loadListener = _loadListeners[i]; loadListener.parent = this; loadListener.period = period; LoadSubscriber subscriber( &loadListener ); child->accept( subscriber ); _nSamples = LB_MAX( _nSamples, period ); } _nSamples = LB_MIN( _nSamples, 100 ); }
void FrameData::adjustQuality( const float delta ) { _quality += delta; _quality = LB_MAX( _quality, 0.1f ); _quality = LB_MIN( _quality, 1.0f ); setDirty( DIRTY_FLAGS ); LBINFO << "Set non-idle image quality to " << _quality << std::endl; }
void TreeEqualizer::_notifyLoadData( Node* node, Channel* channel, const uint32_t nStatistics, const Statistic* statistics ) { if( !node ) return; _notifyLoadData( node->left, channel, nStatistics, statistics ); _notifyLoadData( node->right, channel, nStatistics, statistics ); if( !node->compound || node->compound->getChannel() != channel ) return; // gather relevant load data const uint32_t taskID = node->compound->getTaskID(); int64_t startTime = std::numeric_limits< int64_t >::max(); int64_t endTime = 0; bool loadSet = false; int64_t timeTransmit = 0; for( uint32_t i = 0; i < nStatistics && !loadSet; ++i ) { const Statistic& stat = statistics[ i ]; if( stat.task != taskID ) // from different compound continue; switch( stat.type ) { case Statistic::CHANNEL_CLEAR: case Statistic::CHANNEL_DRAW: case Statistic::CHANNEL_READBACK: startTime = LB_MIN( startTime, stat.startTime ); endTime = LB_MAX( endTime, stat.endTime ); break; case Statistic::CHANNEL_ASYNC_READBACK: case Statistic::CHANNEL_FRAME_TRANSMIT: timeTransmit += stat.endTime - stat.startTime; break; // assemble blocks on input frames, stop using subsequent data case Statistic::CHANNEL_ASSEMBLE: loadSet = true; break; default: break; } } if( startTime == std::numeric_limits< int64_t >::max( )) return; node->time = endTime - startTime; node->time = LB_MAX( node->time, 1 ); node->time = LB_MAX( node->time, timeTransmit ); }
uint128_t computeMinMax() const { uint64_t xMax = 0; uint64_t xMin = std::numeric_limits<uint64_t>::max(); for (ItemsCIter i = items.begin(); i != items.end(); ++i) { const Item& item = *i; xMin = LB_MIN(xMin, item.start); xMax = LB_MAX(xMax, item.end); } return uint128_t(xMax, xMin); }
int64_t NamedPipeConnection::write( const void* buffer, const uint64_t bytes ) { if( !isConnected() || _fd == INVALID_HANDLE_VALUE ) return -1; DWORD wrote; const DWORD use = LB_MIN( bytes, CO_WRITE_BUFFER_SIZE ); ResetEvent( _write.hEvent ); if( WriteFile( _fd, buffer, use, &wrote, &_write )) return wrote; if( GetLastError() != ERROR_IO_PENDING ) { LBWARN << "Could not start write: " << lunchbox::sysError << std::endl; return -1; } DWORD got = 0; if( GetOverlappedResult( _fd, &_write, &got, false )) return got; switch( GetLastError( )) { case ERROR_PIPE_CONNECTED: return 0; case ERROR_IO_PENDING: case ERROR_IO_INCOMPLETE: { if( WAIT_OBJECT_0 != WaitForSingleObject( _write.hEvent, INFINITE )) throw Exception( Exception::TIMEOUT_WRITE ); break; } default: LBWARN << "Write complete failed: " << lunchbox::sysError << std::endl; } if( GetOverlappedResult( _fd, &_write, &got, false )) return got; if( GetLastError() == ERROR_PIPE_CONNECTED ) return 0; LBWARN << "Write complete failed: " << lunchbox::sysError << std::endl; return -1; }
void Channel::_updateNearFar( const mesh::BoundingSphere& boundingSphere ) { // compute dynamic near/far plane of whole model const FrameData& frameData = _getFrameData(); const eq::Matrix4f& rotation = frameData.getCameraRotation(); const eq::Matrix4f headTransform = getHeadTransform() * rotation; eq::Matrix4f modelInv; compute_inverse( headTransform, modelInv ); const eq::Vector3f zero = modelInv * eq::Vector3f::ZERO; eq::Vector3f front = modelInv * eq::Vector3f( 0.0f, 0.0f, -1.0f ); front -= zero; front.normalize(); front *= boundingSphere.w(); const eq::Vector3f center = frameData.getCameraPosition().get_sub_vector< 3 >() - boundingSphere.get_sub_vector< 3 >(); const eq::Vector3f nearPoint = headTransform * ( center - front ); const eq::Vector3f farPoint = headTransform * ( center + front ); if( useOrtho( )) { LBASSERTINFO( fabs( farPoint.z() - nearPoint.z() ) > std::numeric_limits< float >::epsilon(), nearPoint << " == " << farPoint ); setNearFar( -nearPoint.z(), -farPoint.z() ); } else { // estimate minimal value of near plane based on frustum size const eq::Frustumf& frustum = getFrustum(); const float width = fabs( frustum.right() - frustum.left() ); const float height = fabs( frustum.top() - frustum.bottom() ); const float size = LB_MIN( width, height ); const float minNear = frustum.near_plane() / size * .001f; const float zNear = LB_MAX( minNear, -nearPoint.z() ); const float zFar = LB_MAX( zNear * 2.f, -farPoint.z() ); setNearFar( zNear, zFar ); } }
template< class T > void _test() { T* lock = new T; lock->set(); #ifdef LUNCHBOX_USE_OPENMP const size_t nThreads = LB_MIN( lunchbox::OMP::getNThreads() * 3, MAXTHREADS ); #else const size_t nThreads = 16; #endif Thread< T > threads[MAXTHREADS]; for( size_t i = 1; i <= nThreads; i = i << 1 ) { _running = true; for( size_t j = 0; j < i; ++j ) { threads[j].lock = lock; TEST( threads[j].start( )); } lunchbox::sleep( 10 ); // let threads initialize _clock.reset(); lock->unset(); lunchbox::sleep( TIME ); // let threads run _running = false; for( size_t j = 0; j < i; ++j ) TEST( threads[j].join( )); const float time = _clock.getTimef(); TEST( !lock->isSet( )); lock->set(); size_t ops = 0; for( size_t j = 0; j < nThreads; ++j ) ops += threads[j].ops; std::cout << std::setw(20) << lunchbox::className( lock ) << ", " << std::setw(12) << /*set, test, unset*/ 3 * ops / time << ", " << std::setw(3) << i << std::endl; } delete lock; }
void FramerateEqualizer::LoadListener::notifyLoadData( Channel* channel, const uint32_t frameNumber, const Statistics& statistics, const Viewport& region ) { // gather required load data int64_t startTime = std::numeric_limits< int64_t >::max(); int64_t endTime = 0; for( size_t i = 0; i < statistics.size(); ++i ) { const eq::Statistic& data = statistics[i]; switch( data.type ) { case eq::Statistic::CHANNEL_CLEAR: case eq::Statistic::CHANNEL_DRAW: case eq::Statistic::CHANNEL_ASSEMBLE: case eq::Statistic::CHANNEL_READBACK: startTime = LB_MIN( startTime, data.startTime ); endTime = LB_MAX( endTime, data.endTime ); break; default: break; } } if( startTime == std::numeric_limits< int64_t >::max( )) return; if( startTime == endTime ) // very fast draws might report 0 times ++endTime; for( std::deque< FrameTime >::iterator i = parent->_times.begin(); i != parent->_times.end(); ++i ) { FrameTime& frameTime = *i; if( frameTime.first != frameNumber ) continue; const float time = static_cast< float >( endTime - startTime ) / period; frameTime.second = LB_MAX( frameTime.second, time ); LBLOG( LOG_LB2 ) << "Frame " << frameNumber << " channel " << channel->getName() << " time " << time << " period " << period << std::endl; } }
void LoadEqualizer::_updateLeaf( Node* node ) { const Compound* compound = node->compound; const Channel* channel = compound->getChannel(); LBASSERT( channel ); const PixelViewport& pvp = channel->getPixelViewport(); node->resources = compound->isActive() ? compound->getUsage() : 0.f; LBLOG( LOG_LB2 ) << channel->getName() << " active " << compound->isActive() << " using " << node->resources << std::endl; LBASSERT( node->resources >= 0.f ); node->maxSize.x() = pvp.w; node->maxSize.y() = pvp.h; node->boundaryf = getBoundaryf(); node->boundary2i = getBoundary2i(); node->resistancef = getResistancef(); node->resistance2i = getResistance2i(); if( !compound->hasDestinationChannel( )) return; const float nResources = _getTotalResources(); if( getAssembleOnlyLimit() <= nResources - node->resources ) { node->resources = 0.f; return; // OPT } const float time = float( _getTotalTime( )); const float assembleTime = float( _getAssembleTime( )); if( assembleTime == 0.f || node->resources == 0.f ) return; const float timePerResource = time / ( nResources - node->resources ); const float renderTime = timePerResource * node->resources ; const float clampedAssembleTime = LB_MIN( assembleTime, renderTime ); const float newTimePerResource = (time + clampedAssembleTime) / nResources; node->resources -= ( clampedAssembleTime / newTimePerResource ); if( node->resources < 0.f ) // may happen due to fp rounding node->resources = 0.f; }
//---------------------------------------------------------------------- // read //---------------------------------------------------------------------- void NamedPipeConnection::readNB( void* buffer, const uint64_t bytes ) { if( isClosed( )) return; ResetEvent( _read.hEvent ); DWORD use = LB_MIN( bytes, CO_READ_BUFFER_SIZE ); if( ReadFile( _fd, buffer, use, &_readDone, &_read ) ) { LBASSERT( _readDone > 0 ); SetEvent( _read.hEvent ); } else if( GetLastError() != ERROR_IO_PENDING ) { LBWARN << "Could not start overlapped receive: " << lunchbox::sysError << ", closing connection" << std::endl; close(); } }
void Renderer::updateNearFar( const Vector4f& boundingSphere ) { const Matrix4f& view = getViewMatrix(); Matrix4f viewInv; compute_inverse( view, viewInv ); const Vector3f& zero = viewInv * Vector3f::ZERO; Vector3f front = viewInv * Vector3f( 0.0f, 0.0f, -1.0f ); front -= zero; front.normalize(); front *= boundingSphere.w(); const Vector3f& translation = getModelMatrix().get_translation(); const Vector3f& center = translation - boundingSphere.get_sub_vector< 3 >(); const Vector3f& nearPoint = view * ( center - front ); const Vector3f& farPoint = view * ( center + front ); if( _impl->useOrtho( )) { LBASSERTINFO( fabs( farPoint.z() - nearPoint.z() ) > std::numeric_limits< float >::epsilon(), nearPoint << " == " << farPoint ); setNearFar( -nearPoint.z(), -farPoint.z() ); } else { // estimate minimal value of near plane based on frustum size const eq::Frustumf& frustum = _impl->getFrustum(); const float width = fabs( frustum.right() - frustum.left() ); const float height = fabs( frustum.top() - frustum.bottom() ); const float size = LB_MIN( width, height ); const float minNear = frustum.near_plane() / size * .001f; const float zNear = LB_MAX( minNear, -nearPoint.z() ); const float zFar = LB_MAX( zNear * 2.f, -farPoint.z() ); setNearFar( zNear, zFar ); } }
void ROIFinder::_readbackInfo( util::ObjectManager& glObjects ) { LBASSERT( glObjects.supportsEqTexture( )); LBASSERT( glObjects.supportsEqFrameBufferObject( )); PixelViewport pvp = _pvp; pvp.apply( Zoom( GRID_SIZE, GRID_SIZE )); pvp.w = LB_MIN( pvp.w+pvp.x, _pvpOriginal.w+_pvpOriginal.x ) - pvp.x; pvp.h = LB_MIN( pvp.h+pvp.y, _pvpOriginal.h+_pvpOriginal.y ) - pvp.y; LBASSERT( pvp.isValid()); // copy frame buffer to texture const void* bufferKey = _getInfoKey( ); util::Texture* texture = glObjects.obtainEqTexture( bufferKey, GL_TEXTURE_RECTANGLE_ARB ); #ifdef EQ_ROI_USE_DEPTH_TEXTURE texture->copyFromFrameBuffer( GL_DEPTH_COMPONENT, pvp ); #else texture->copyFromFrameBuffer( GL_RGBA, pvp ); #endif // draw zoomed quad into FBO const void* fboKey = _getInfoKey( ); util::FrameBufferObject* fbo = glObjects.getEqFrameBufferObject( fboKey ); if( fbo ) { LBCHECK( fbo->resize( _pvp.w, _pvp.h )); } else { fbo = glObjects.newEqFrameBufferObject( fboKey ); LBCHECK( fbo->init( _pvp.w, _pvp.h, GL_RGBA32F, 0, 0 )); } fbo->bind(); texture->bind(); // Enable & download depth texture glEnable( GL_TEXTURE_RECTANGLE_ARB ); texture->applyWrap(); texture->applyZoomFilter( FILTER_LINEAR ); // Enable shaders GLuint program = glObjects.getProgram( shaderRBInfo ); if( program == util::ObjectManager::INVALID ) { // Create fragment shader which reads depth values from // rectangular textures const GLuint shader = glObjects.newShader( shaderRBInfo, GL_FRAGMENT_SHADER ); LBASSERT( shader != util::ObjectManager::INVALID ); #ifdef EQ_ROI_USE_DEPTH_TEXTURE const GLchar* fShaderPtr = roiFragmentShader_glsl.c_str(); #else const GLchar* fShaderPtr = roiFragmentShaderRGB_glsl.c_str(); #endif EQ_GL_CALL( glShaderSource( shader, 1, &fShaderPtr, 0 )); EQ_GL_CALL( glCompileShader( shader )); GLint status; glGetShaderiv( shader, GL_COMPILE_STATUS, &status ); if( !status ) LBERROR << "Failed to compile fragment shader for ROI finder" << std::endl; program = glObjects.newProgram( shaderRBInfo ); EQ_GL_CALL( glAttachShader( program, shader )); EQ_GL_CALL( glLinkProgram( program )); glGetProgramiv( program, GL_LINK_STATUS, &status ); if( !status ) { LBWARN << "Failed to link shader program for ROI finder" << std::endl; return; } // use fragment shader and setup uniforms EQ_GL_CALL( glUseProgram( program )); GLint param = glGetUniformLocation( program, "texture" ); glUniform1i( param, 0 ); } else { // use fragment shader EQ_GL_CALL( glUseProgram( program )); } // Draw Quad glDisable( GL_LIGHTING ); glColor3f( 1.0f, 1.0f, 1.0f ); glBegin( GL_QUADS ); glVertex3i( 0, 0, 0 ); glVertex3i( _pvp.w, 0, 0 ); glVertex3i( _pvp.w, _pvp.h, 0 ); glVertex3i( 0, _pvp.h, 0 ); glEnd(); // restore state glDisable( GL_TEXTURE_RECTANGLE_ARB ); EQ_GL_CALL( glUseProgram( 0 )); fbo->unbind(); // finish readback of info LBASSERT( static_cast<int32_t>(_perBlockInfo.size()) >= _pvp.w*_pvp.h*4 ); texture = fbo->getColorTextures()[0]; LBASSERT( texture->getFormat() == GL_RGBA ); LBASSERT( texture->getType() == GL_FLOAT ); texture->download( &_perBlockInfo[0] ); }
template< class T, uint32_t hold > void _test() { T* lock = new T; lock->set(); #ifdef LUNCHBOX_USE_OPENMP const size_t nThreads = LB_MIN( lunchbox::OMP::getNThreads()*3, MAXTHREADS ); #else const size_t nThreads = 16; #endif WriteThread< T, hold > writers[MAXTHREADS]; ReadThread< T, hold > readers[MAXTHREADS]; std::cout << " Class, write ops/ms, read ops/ms, w threads, " << "r threads" << std::endl; for( size_t nWrite = 0; nWrite <= nThreads; nWrite = (nWrite == 0) ? 1 : nWrite << 1 ) { for( size_t i = 1; i <= nThreads; i = i << 1 ) { if( i < nWrite ) continue; const size_t nRead = i - nWrite; _running = true; for( size_t j = 0; j < nWrite; ++j ) { writers[j].lock = lock; TEST( writers[j].start( )); } for( size_t j = 0; j < nRead; ++j ) { readers[j].lock = lock; TESTINFO( readers[j].start(), j ); } lunchbox::sleep( 10 ); // let threads initialize _clock.reset(); lock->unset(); lunchbox::sleep( TIME ); // let threads run _running = false; for( size_t j = 0; j < nWrite; ++j ) TEST( writers[j].join( )); for( size_t j = 0; j < nRead; ++j ) TEST( readers[j].join( )); const double time = _clock.getTimed(); TEST( !lock->isSet( )); lock->set(); size_t nWriteOps = 0; double wTime = time * double( nWrite ); for( size_t j = 0; j < nWrite; ++j ) { nWriteOps += writers[j].ops; wTime -= writers[j].sTime; } if( nWrite > 0 ) wTime /= double( nWrite ); if( wTime == 0.f ) wTime = std::numeric_limits< double >::epsilon(); size_t nReadOps = 0; double rTime = time * double( nRead ); for( size_t j = 0; j < nRead; ++j ) { nReadOps += readers[j].ops; rTime -= readers[j].sTime; } if( nRead > 0 ) rTime /= double( nRead ); if( rTime == 0.f ) rTime = std::numeric_limits< double >::epsilon(); std::cout << std::setw(20)<< lunchbox::className( lock ) << ", " << std::setw(12) << 3 * nWriteOps / wTime << ", " << std::setw(12) << 3 * nReadOps / rTime << ", " << std::setw(9) << nWrite << ", " << std::setw(9) << nRead << std::endl; } } delete lock; }
void ViewEqualizer::Listener::notifyLoadData(Channel* channel, const uint32_t frameNumber, const Statistics& statistics, const Viewport& /*region*/) { Load& load = _getLoad(frameNumber); if (load == Load::NONE) return; LBASSERT(_taskIDs.find(channel) != _taskIDs.end()); const uint32_t taskID = _taskIDs[channel]; // gather relevant load data int64_t startTime = std::numeric_limits<int64_t>::max(); int64_t endTime = 0; bool loadSet = false; int64_t transmitTime = 0; for (size_t i = 0; i < statistics.size() && !loadSet; ++i) { const Statistic& data = statistics[i]; if (data.task != taskID) // data from another compound continue; switch (data.type) { case Statistic::CHANNEL_CLEAR: case Statistic::CHANNEL_DRAW: case Statistic::CHANNEL_READBACK: startTime = LB_MIN(startTime, data.startTime); endTime = LB_MAX(endTime, data.endTime); break; case Statistic::CHANNEL_ASYNC_READBACK: case Statistic::CHANNEL_FRAME_TRANSMIT: transmitTime += data.startTime - data.endTime; break; case Statistic::CHANNEL_FRAME_WAIT_SENDTOKEN: transmitTime -= data.endTime - data.startTime; break; // assemble blocks on input frames, stop using subsequent data case Statistic::CHANNEL_ASSEMBLE: loadSet = true; break; default: break; } } if (startTime == std::numeric_limits<int64_t>::max()) return; LBASSERTINFO(load.missing > 0, load << " for " << channel->getName() << " " << channel->getSerial()); const int64_t time = LB_MAX(endTime - startTime, transmitTime); load.time += time; --load.missing; if (load.missing == 0) { const float rTime = float(load.time) / float(load.nResources); load.time = int64_t(rTime * sqrtf(float(load.nResources))); } LBLOG(LOG_LB1) << "Task " << taskID << ", added time " << time << " to " << load << " from " << channel->getName() << " " << channel->getSerial() << std::endl; }
void LoadEqualizer::_computeSplit( Node* node, const float time, LBDatas* datas, const Viewport& vp, const Range& range ) { LBLOG( LOG_LB2 ) << "_computeSplit " << vp << ", " << range << " time " << time << std::endl; LBASSERTINFO( vp.isValid(), vp ); LBASSERTINFO( range.isValid(), range ); LBASSERTINFO( node->resources > 0.f || !vp.hasArea() || !range.hasData(), "Assigning " << node->resources << " work to viewport " << vp << ", " << range ); Compound* compound = node->compound; if( compound ) { _assign( compound, vp, range ); return; } LBASSERT( node->left && node->right ); LBDatas workingSet = datas[ node->mode ]; const float leftTime = node->resources > 0 ? time * node->left->resources / node->resources : 0.f; float timeLeft = LB_MIN( leftTime, time ); // correct for fp rounding error switch( node->mode ) { case MODE_VERTICAL: { LBASSERT( range == Range::ALL ); float splitPos = vp.x; const float end = vp.getXEnd(); while( timeLeft > std::numeric_limits< float >::epsilon() && splitPos < end ) { LBLOG( LOG_LB2 ) << timeLeft << "ms left using " << workingSet.size() << " tiles" << std::endl; // remove all irrelevant items from working set for( LBDatas::iterator i = workingSet.begin(); i != workingSet.end(); ) { const Data& data = *i; if( data.vp.getXEnd() > splitPos ) ++i; else i = workingSet.erase( i ); } if( workingSet.empty( )) break; // find next 'discontinouity' in loads float currentPos = 1.0f; for( LBDatas::const_iterator i = workingSet.begin(); i != workingSet.end(); ++i ) { const Data& data = *i; if( data.vp.x > splitPos && data.vp.x < currentPos ) currentPos = data.vp.x; const float xEnd = data.vp.getXEnd(); if( xEnd > splitPos && xEnd < currentPos ) currentPos = xEnd; } const float width = currentPos - splitPos; LBASSERTINFO( width > 0.f, currentPos << "<=" << splitPos ); LBASSERT( currentPos <= 1.0f ); // accumulate normalized load in splitPos...currentPos LBLOG( LOG_LB2 ) << "Computing load in X " << splitPos << "..." << currentPos << std::endl; float currentTime = 0.f; for( LBDatas::const_iterator i = workingSet.begin(); i != workingSet.end(); ++i ) { const Data& data = *i; if( data.vp.x >= currentPos ) // not yet needed data sets break; float yContrib = data.vp.h; if( data.vp.y < vp.y ) yContrib -= (vp.y - data.vp.y); const float dataEnd = data.vp.getYEnd(); const float vpEnd = vp.getYEnd(); if( dataEnd > vpEnd ) yContrib -= (dataEnd - vpEnd); if( yContrib > 0.f ) { const float percentage = ( width / data.vp.w ) * ( yContrib / data.vp.h ); currentTime += ( data.time * percentage ); LBLOG( LOG_LB2 ) << data.vp << " contributes " << yContrib << " in " << vp.h << " (" << percentage << ") with " << data.time << ": " << ( data.time * percentage ) << " vp.y " << vp.y << " dataEnd " << dataEnd << " vpEnd " << vpEnd << std::endl; LBASSERT( percentage < 1.01f ) } } LBLOG( LOG_LB2 ) << splitPos << "..." << currentPos << ": t=" << currentTime << " of " << timeLeft << std::endl; if( currentTime >= timeLeft ) // found last region { splitPos += ( width * timeLeft / currentTime ); timeLeft = 0.0f; } else { timeLeft -= currentTime; splitPos = currentPos; } } LBLOG( LOG_LB2 ) << "Should split at X " << splitPos << std::endl; if( getDamping() < 1.f ) splitPos = (1.f - getDamping()) * splitPos + getDamping() * node->split; LBLOG( LOG_LB2 ) << "Dampened split at X " << splitPos << std::endl; // There might be more time left due to MIN_PIXEL rounding by parent // LBASSERTINFO( timeLeft <= .001f, timeLeft ); // Ensure minimum size const Compound* root = getCompound(); const float pvpW = static_cast< float >( root->getInheritPixelViewport().w ); const float boundary = static_cast< float >( node->boundary2i.x()) / pvpW; if( node->left->resources == 0.f ) splitPos = vp.x; else if( node->right->resources == 0.f ) splitPos = end; else if( boundary > 0 ) { const float lengthRight = vp.getXEnd() - splitPos; const float lengthLeft = splitPos - vp.x; const float maxRight = static_cast< float >( node->right->maxSize.x( )) / pvpW; const float maxLeft = static_cast< float >( node->left->maxSize.x( )) / pvpW; if( lengthRight > maxRight ) splitPos = end - maxRight; else if( lengthLeft > maxLeft ) splitPos = vp.x + maxLeft; if( (splitPos - vp.x) < boundary ) splitPos = vp.x + boundary; if( (end - splitPos) < boundary ) splitPos = end - boundary; const uint32_t ratio = static_cast< uint32_t >( splitPos / boundary + .5f ); splitPos = ratio * boundary; } splitPos = LB_MAX( splitPos, vp.x ); splitPos = LB_MIN( splitPos, end); const float newPixelW = pvpW * splitPos; const float oldPixelW = pvpW * node->split; if( int( fabs(newPixelW - oldPixelW) ) < node->resistance2i.x( )) splitPos = node->split; else node->split = splitPos; LBLOG( LOG_LB2 ) << "Constrained split " << vp << " at X " << splitPos << std::endl; // balance children Viewport childVP = vp; childVP.w = (splitPos - vp.x); _computeSplit( node->left, leftTime, datas, childVP, range ); childVP.x = childVP.getXEnd(); childVP.w = end - childVP.x; // Fix 2994111: Rounding errors with 2D LB and 16 sources // Floating point rounding may create a width for the 'right' // child which is slightly below the parent width. Correct it. while( childVP.getXEnd() < end ) childVP.w += std::numeric_limits< float >::epsilon(); _computeSplit( node->right, time-leftTime, datas, childVP, range ); break; } case MODE_HORIZONTAL: { LBASSERT( range == Range::ALL ); float splitPos = vp.y; const float end = vp.getYEnd(); while( timeLeft > std::numeric_limits< float >::epsilon() && splitPos < end ) { LBLOG( LOG_LB2 ) << timeLeft << "ms left using " << workingSet.size() << " tiles" << std::endl; // remove all unrelevant items from working set for( LBDatas::iterator i = workingSet.begin(); i != workingSet.end(); ) { const Data& data = *i; if( data.vp.getYEnd() > splitPos ) ++i; else i = workingSet.erase( i ); } if( workingSet.empty( )) break; // find next 'discontinuouity' in loads float currentPos = 1.0f; for( LBDatas::const_iterator i = workingSet.begin(); i != workingSet.end(); ++i ) { const Data& data = *i; if( data.vp.y > splitPos && data.vp.y < currentPos ) currentPos = data.vp.y; const float yEnd = data.vp.getYEnd(); if( yEnd > splitPos && yEnd < currentPos ) currentPos = yEnd; } const float height = currentPos - splitPos; LBASSERTINFO( height > 0.f, currentPos << "<=" << splitPos ); LBASSERT( currentPos <= 1.0f ); // accumulate normalized load in splitPos...currentPos LBLOG( LOG_LB2 ) << "Computing load in Y " << splitPos << "..." << currentPos << std::endl; float currentTime = 0.f; for( LBDatas::const_iterator i = workingSet.begin(); i != workingSet.end(); ++i ) { const Data& data = *i; if( data.vp.y >= currentPos ) // not yet needed data sets break; float xContrib = data.vp.w; if( data.vp.x < vp.x ) xContrib -= (vp.x - data.vp.x); const float dataEnd = data.vp.getXEnd(); const float vpEnd = vp.getXEnd(); if( dataEnd > vpEnd ) xContrib -= (dataEnd - vpEnd); if( xContrib > 0.f ) { const float percentage = ( height / data.vp.h ) * ( xContrib / data.vp.w ); currentTime += ( data.time * percentage ); LBLOG( LOG_LB2 ) << data.vp << " contributes " << xContrib << " in " << vp.w << " (" << percentage << ") with " << data.time << ": " << ( data.time * percentage ) << " total " << currentTime << " vp.x " << vp.x << " dataEnd " << dataEnd << " vpEnd " << vpEnd << std::endl; LBASSERT( percentage < 1.01f ) } } LBLOG( LOG_LB2 ) << splitPos << "..." << currentPos << ": t=" << currentTime << " of " << timeLeft << std::endl; if( currentTime >= timeLeft ) // found last region { splitPos += (height * timeLeft / currentTime ); timeLeft = 0.0f; } else { timeLeft -= currentTime; splitPos = currentPos; } } LBLOG( LOG_LB2 ) << "Should split at Y " << splitPos << std::endl; if( getDamping() < 1.f ) splitPos = (1.f - getDamping( )) * splitPos + getDamping() * node->split; LBLOG( LOG_LB2 ) << "Dampened split at Y " << splitPos << std::endl; const Compound* root = getCompound(); const float pvpH = static_cast< float >( root->getInheritPixelViewport().h ); const float boundary = static_cast< float >(node->boundary2i.y( )) / pvpH; if( node->left->resources == 0.f ) splitPos = vp.y; else if( node->right->resources == 0.f ) splitPos = end; else if ( boundary > 0 ) { const float lengthRight = vp.getYEnd() - splitPos; const float lengthLeft = splitPos - vp.y; const float maxRight = static_cast< float >( node->right->maxSize.y( )) / pvpH; const float maxLeft = static_cast< float >( node->left->maxSize.y( )) / pvpH; if( lengthRight > maxRight ) splitPos = end - maxRight; else if( lengthLeft > maxLeft ) splitPos = vp.y + maxLeft; if( (splitPos - vp.y) < boundary ) splitPos = vp.y + boundary; if( (end - splitPos) < boundary ) splitPos = end - boundary; const uint32_t ratio = static_cast< uint32_t >( splitPos / boundary + .5f ); splitPos = ratio * boundary; } splitPos = LB_MAX( splitPos, vp.y ); splitPos = LB_MIN( splitPos, end ); const float newPixelH = pvpH * splitPos; const float oldPixelH = pvpH * node->split; if( int( fabs(newPixelH - oldPixelH) ) < node->resistance2i.y( )) splitPos = node->split; else node->split = splitPos; LBLOG( LOG_LB2 ) << "Constrained split " << vp << " at Y " << splitPos << std::endl; Viewport childVP = vp; childVP.h = (splitPos - vp.y); _computeSplit( node->left, leftTime, datas, childVP, range ); childVP.y = childVP.getYEnd(); childVP.h = end - childVP.y; while( childVP.getYEnd() < end ) childVP.h += std::numeric_limits< float >::epsilon(); _computeSplit( node->right, time - leftTime, datas, childVP, range); break; } case MODE_DB: { LBASSERT( vp == Viewport::FULL ); float splitPos = range.start; const float end = range.end; while( timeLeft > std::numeric_limits< float >::epsilon() && splitPos < end ) { LBLOG( LOG_LB2 ) << timeLeft << "ms left using " << workingSet.size() << " tiles" << std::endl; // remove all irrelevant items from working set for( LBDatas::iterator i = workingSet.begin(); i != workingSet.end(); ) { const Data& data = *i; if( data.range.end > splitPos ) ++i; else i = workingSet.erase( i ); } if( workingSet.empty( )) break; // find next 'discontinouity' in loads float currentPos = 1.0f; for( LBDatas::const_iterator i = workingSet.begin(); i != workingSet.end(); ++i ) { const Data& data = *i; currentPos = LB_MIN( currentPos, data.range.end ); } const float size = currentPos - splitPos; LBASSERTINFO( size > 0.f, currentPos << "<=" << splitPos ); LBASSERT( currentPos <= 1.0f ); // accumulate normalized load in splitPos...currentPos LBLOG( LOG_LB2 ) << "Computing load in range " << splitPos << "..." << currentPos << std::endl; float currentTime = 0.f; for( LBDatas::const_iterator i = workingSet.begin(); i != workingSet.end(); ++i ) { const Data& data = *i; if( data.range.start >= currentPos ) // not yet needed data break; #if 0 // make sure we cover full area LBASSERTINFO( data.range.start <= splitPos, data.range.start << " > " << splitPos ); LBASSERTINFO( data.range.end >= currentPos, data.range.end << " < " << currentPos); #endif currentTime += data.time * size / data.range.getSize(); } LBLOG( LOG_LB2 ) << splitPos << "..." << currentPos << ": t=" << currentTime << " of " << timeLeft << std::endl; if( currentTime >= timeLeft ) // found last region { const float width = currentPos - splitPos; splitPos += (width * timeLeft / currentTime ); timeLeft = 0.0f; } else { timeLeft -= currentTime; splitPos = currentPos; } } LBLOG( LOG_LB2 ) << "Should split at " << splitPos << std::endl; if( getDamping() < 1.f ) splitPos = (1.f - getDamping( )) * splitPos + getDamping() * node->split; LBLOG( LOG_LB2 ) << "Dampened split at " << splitPos << std::endl; const float boundary( node->boundaryf ); if( node->left->resources == 0.f ) splitPos = range.start; else if( node->right->resources == 0.f ) splitPos = end; const uint32_t ratio = static_cast< uint32_t > ( splitPos / boundary + .5f ); splitPos = ratio * boundary; if( (splitPos - range.start) < boundary ) splitPos = range.start; if( (end - splitPos) < boundary ) splitPos = end; if( fabs( splitPos - node->split ) < node->resistancef ) splitPos = node->split; else node->split = splitPos; LBLOG( LOG_LB2 ) << "Constrained split " << range << " at pos " << splitPos << std::endl; Range childRange = range; childRange.end = splitPos; _computeSplit( node->left, leftTime, datas, vp, childRange ); childRange.start = childRange.end; childRange.end = range.end; _computeSplit( node->right, time - leftTime, datas, vp, childRange); break; } default: LBUNIMPLEMENTED; } }
void LoadEqualizer::_updateNode( Node* node, const Viewport& vp, const Range& range ) { Node* left = node->left; Node* right = node->right; LBASSERT( left ); LBASSERT( right ); Viewport leftVP = vp; Viewport rightVP = vp; Range leftRange = range; Range rightRange = range; switch( node->mode ) { default: LBUNIMPLEMENTED; case MODE_VERTICAL: leftVP.w = vp.w * .5f; rightVP.x = leftVP.getXEnd(); rightVP.w = vp.getXEnd() - rightVP.x; node->split = leftVP.getXEnd(); break; case MODE_HORIZONTAL: leftVP.h = vp.h * .5f; rightVP.y = leftVP.getYEnd(); rightVP.h = vp.getYEnd() - rightVP.y; node->split = leftVP.getYEnd(); break; case MODE_DB: leftRange.end = range.start + ( range.end - range.start ) * .5f; rightRange.start = leftRange.end; node->split = leftRange.end; break; } _update( left, leftVP, leftRange ); _update( right, rightVP, rightRange ); node->resources = left->resources + right->resources; if( left->resources == 0.f ) { node->maxSize = right->maxSize; node->boundary2i = right->boundary2i; node->boundaryf = right->boundaryf; node->resistance2i = right->resistance2i; node->resistancef = right->resistancef; } else if( right->resources == 0.f ) { node->maxSize = left->maxSize; node->boundary2i = left->boundary2i; node->boundaryf = left->boundaryf; node->resistance2i = left->resistance2i; node->resistancef = left->resistancef; } else { switch( node->mode ) { case MODE_VERTICAL: node->maxSize.x() = left->maxSize.x() + right->maxSize.x(); node->maxSize.y() = LB_MIN( left->maxSize.y(), right->maxSize.y()); node->boundary2i.x() = left->boundary2i.x()+ right->boundary2i.x(); node->boundary2i.y() = LB_MAX( left->boundary2i.y(), right->boundary2i.y()); node->boundaryf = LB_MAX( left->boundaryf, right->boundaryf ); node->resistance2i.x() = LB_MAX( left->resistance2i.x(), right->resistance2i.x( )); node->resistance2i.y() = LB_MAX( left->resistance2i.y(), right->resistance2i.y()); node->resistancef = LB_MAX( left->resistancef, right->resistancef ); break; case MODE_HORIZONTAL: node->maxSize.x() = LB_MIN( left->maxSize.x(), right->maxSize.x()); node->maxSize.y() = left->maxSize.y() + right->maxSize.y(); node->boundary2i.x() = LB_MAX( left->boundary2i.x(), right->boundary2i.x() ); node->boundary2i.y() = left->boundary2i.y()+ right->boundary2i.y(); node->boundaryf = LB_MAX( left->boundaryf, right->boundaryf ); node->resistance2i.x() = LB_MAX( left->resistance2i.x(), right->resistance2i.x() ); node->resistance2i.y() = LB_MAX( left->resistance2i.y(), right->resistance2i.y( )); node->resistancef = LB_MAX( left->resistancef, right->resistancef ); break; case MODE_DB: node->boundary2i.x() = LB_MAX( left->boundary2i.x(), right->boundary2i.x() ); node->boundary2i.y() = LB_MAX( left->boundary2i.y(), right->boundary2i.y() ); node->boundaryf = left->boundaryf + right->boundaryf; node->resistance2i.x() = LB_MAX( left->resistance2i.x(), right->resistance2i.x() ); node->resistance2i.y() = LB_MAX( left->resistance2i.y(), right->resistance2i.y() ); node->resistancef = LB_MAX( left->resistancef, right->resistancef ); break; default: LBUNIMPLEMENTED; } } }
void LoadEqualizer::notifyLoadData( Channel* channel, const uint32_t frameNumber, const Statistics& statistics, const Viewport& region ) { LBLOG( LOG_LB2 ) << statistics.size() << " samples from "<< channel->getName() << " @ " << frameNumber << std::endl; for( std::deque< LBFrameData >::iterator i = _history.begin(); i != _history.end(); ++i ) { LBFrameData& frameData = *i; if( frameData.first != frameNumber ) continue; // Found corresponding historical data set LBDatas& items = frameData.second; for( LBDatas::iterator j = items.begin(); j != items.end(); ++j ) { Data& data = *j; if( data.channel != channel ) continue; // Found corresponding historical data item const uint32_t taskID = data.taskID; LBASSERTINFO( taskID > 0, channel->getName( )); // gather relevant load data int64_t startTime = std::numeric_limits< int64_t >::max(); int64_t endTime = 0; bool loadSet = false; int64_t transmitTime = 0; for( size_t k = 0; k < statistics.size(); ++k ) { const Statistic& stat = statistics[k]; if( stat.task == data.destTaskID ) _updateAssembleTime( data, stat ); // from different compound if( stat.task != taskID || loadSet ) continue; switch( stat.type ) { case Statistic::CHANNEL_CLEAR: case Statistic::CHANNEL_DRAW: case Statistic::CHANNEL_READBACK: startTime = LB_MIN( startTime, stat.startTime ); endTime = LB_MAX( endTime, stat.endTime ); break; case Statistic::CHANNEL_ASYNC_READBACK: case Statistic::CHANNEL_FRAME_TRANSMIT: transmitTime += stat.endTime - stat.startTime; break; case Statistic::CHANNEL_FRAME_WAIT_SENDTOKEN: transmitTime -= stat.endTime - stat.startTime; break; // assemble blocks on input frames, stop using subsequent data case Statistic::CHANNEL_ASSEMBLE: loadSet = true; break; default: break; } } if( startTime == std::numeric_limits< int64_t >::max( )) return; data.vp.apply( region ); // Update ROI data.time = endTime - startTime; data.time = LB_MAX( data.time, 1 ); data.time = LB_MAX( data.time, transmitTime ); data.assembleTime = LB_MAX( data.assembleTime, 0 ); LBLOG( LOG_LB2 ) << "Added time " << data.time << " (+" << data.assembleTime << ") for " << channel->getName() << " " << data.vp << ", " << data.range << " @ " << frameNumber << std::endl; return; // Note: if the same channel is used twice as a child, the // load-compound association does not work. } } }
void TreeEqualizer::_assign( Node* node, const Viewport& vp, const Range& range ) { LBLOG( LOG_LB2 ) << "assign " << vp << ", " << range << " time " << node->time << " split " << node->split << std::endl; LBASSERTINFO( vp.isValid(), vp ); LBASSERTINFO( range.isValid(), range ); LBASSERTINFO( node->resources > 0.f || !vp.hasArea() || !range.hasData(), "Assigning work to unused compound: " << vp << ", " << range); Compound* compound = node->compound; if( compound ) { LBASSERTINFO( vp == Viewport::FULL || range == Range::ALL, "Mixed 2D/DB load-balancing not implemented" ); compound->setViewport( vp ); compound->setRange( range ); LBLOG( LOG_LB2 ) << compound->getChannel()->getName() << " set " << vp << ", " << range << std::endl; return; } switch( node->mode ) { case MODE_VERTICAL: { // Ensure minimum size const Compound* root = getCompound(); const float pvpW = float( root->getInheritPixelViewport().w ); const float end = vp.getXEnd(); const float boundary = float( node->boundary2i.x( )) / pvpW; float absoluteSplit = vp.x + vp.w * node->split; if( node->left->resources == 0.f ) absoluteSplit = vp.x; else if( node->right->resources == 0.f ) absoluteSplit = end; else if( boundary > 0 ) { const float right = vp.getXEnd() - absoluteSplit; const float left = absoluteSplit - vp.x; const float maxRight = float( node->right->maxSize.x( )) / pvpW; const float maxLeft = float( node->left->maxSize.x( )) / pvpW; if( right > maxRight ) absoluteSplit = end - maxRight; else if( left > maxLeft ) absoluteSplit = vp.x + maxLeft; if( (absoluteSplit - vp.x) < boundary ) absoluteSplit = vp.x + boundary; if( (end - absoluteSplit) < boundary ) absoluteSplit = end - boundary; const uint32_t ratio = uint32_t( absoluteSplit / boundary + .5f ); absoluteSplit = ratio * boundary; } absoluteSplit = LB_MAX( absoluteSplit, vp.x ); absoluteSplit = LB_MIN( absoluteSplit, end); node->split = (absoluteSplit - vp.x ) / vp.w; LBLOG( LOG_LB2 ) << "Constrained split " << vp << " at X " << node->split << std::endl; // traverse children Viewport childVP = vp; childVP.w = (absoluteSplit - vp.x); _assign( node->left, childVP, range ); childVP.x = childVP.getXEnd(); childVP.w = end - childVP.x; // Fix 2994111: Rounding errors with 2D LB and 16 sources // Floating point rounding may create a width for the 'right' // child which is slightly below the parent width. Correct it. while( childVP.getXEnd() < end ) childVP.w += std::numeric_limits< float >::epsilon(); _assign( node->right, childVP, range ); break; } case MODE_HORIZONTAL: { // Ensure minimum size const Compound* root = getCompound(); const float pvpH = float( root->getInheritPixelViewport().h ); const float end = vp.getYEnd(); const float boundary = float( node->boundary2i.y( )) / pvpH; float absoluteSplit = vp.y + vp.h * node->split; if( node->left->resources == 0.f ) absoluteSplit = vp.y; else if( node->right->resources == 0.f ) absoluteSplit = end; else if( boundary > 0 ) { const float right = vp.getYEnd() - absoluteSplit; const float left = absoluteSplit - vp.y; const float maxRight = float( node->right->maxSize.y( )) / pvpH; const float maxLeft = float( node->left->maxSize.y( )) / pvpH; if( right > maxRight ) absoluteSplit = end - maxRight; else if( left > maxLeft ) absoluteSplit = vp.y + maxLeft; if( (absoluteSplit - vp.y) < boundary ) absoluteSplit = vp.y + boundary; if( (end - absoluteSplit) < boundary ) absoluteSplit = end - boundary; const uint32_t ratio = uint32_t( absoluteSplit / boundary + .5f ); absoluteSplit = ratio * boundary; } absoluteSplit = LB_MAX( absoluteSplit, vp.y ); absoluteSplit = LB_MIN( absoluteSplit, end); node->split = (absoluteSplit - vp.y ) / vp.h; LBLOG( LOG_LB2 ) << "Constrained split " << vp << " at X " << node->split << std::endl; // traverse children Viewport childVP = vp; childVP.h = (absoluteSplit - vp.y); _assign( node->left, childVP, range ); childVP.y = childVP.getYEnd(); childVP.h = end - childVP.y; // Fix 2994111: Rounding errors with 2D LB and 16 sources // Floating point rounding may create a width for the 'right' // child which is slightly below the parent width. Correct it. while( childVP.getYEnd() < end ) childVP.h += std::numeric_limits< float >::epsilon(); _assign( node->right, childVP, range ); break; } case MODE_DB: { LBASSERT( vp == Viewport::FULL ); const float end = range.end; float absoluteSplit = range.start + (range.end-range.start)*node->split; const float boundary( node->boundaryf ); if( node->left->resources == 0.f ) absoluteSplit = range.start; else if( node->right->resources == 0.f ) absoluteSplit = end; const uint32_t ratio = uint32_t( absoluteSplit / boundary + .5f ); absoluteSplit = ratio * boundary; if( (absoluteSplit - range.start) < boundary ) absoluteSplit = range.start; if( (end - absoluteSplit) < boundary ) absoluteSplit = end; node->split = (absoluteSplit-range.start) / (range.end-range.start); LBLOG( LOG_LB2 ) << "Constrained split " << range << " at pos " << node->split << std::endl; Range childRange = range; childRange.end = absoluteSplit; _assign( node->left, vp, childRange ); childRange.start = childRange.end; childRange.end = range.end; _assign( node->right, vp, childRange); break; } default: LBUNIMPLEMENTED; } }
void TreeEqualizer::_update( Node* node ) { if( !node ) return; const Compound* compound = node->compound; if( compound ) { const Channel* channel = compound->getChannel(); const PixelViewport& pvp = channel->getPixelViewport(); LBASSERT( channel ); node->resources = compound->isActive() ? compound->getUsage() : 0.f; node->maxSize.x() = pvp.w; node->maxSize.y() = pvp.h; node->boundaryf = _boundaryf; node->boundary2i = _boundary2i; return; } // else LBASSERT( node->left ); LBASSERT( node->right ); _update( node->left ); _update( node->right ); node->resources = node->left->resources + node->right->resources; if( node->left->resources == 0.f ) { node->maxSize = node->right->maxSize; node->boundary2i = node->right->boundary2i; node->boundaryf = node->right->boundaryf; node->time = node->right->time; } else if( node->right->resources == 0.f ) { node->maxSize = node->left->maxSize; node->boundary2i = node->left->boundary2i; node->boundaryf = node->left->boundaryf; node->time = node->left->time; } else { switch( node->mode ) { case MODE_VERTICAL: node->maxSize.x() = node->left->maxSize.x() + node->right->maxSize.x(); node->maxSize.y() = LB_MIN( node->left->maxSize.y(), node->right->maxSize.y() ); node->boundary2i.x() = node->left->boundary2i.x() + node->right->boundary2i.x(); node->boundary2i.y() = LB_MAX( node->left->boundary2i.y(), node->right->boundary2i.y()); node->boundaryf = LB_MAX( node->left->boundaryf, node->right->boundaryf ); break; case MODE_HORIZONTAL: node->maxSize.x() = LB_MIN( node->left->maxSize.x(), node->right->maxSize.x() ); node->maxSize.y() = node->left->maxSize.y() + node->right->maxSize.y(); node->boundary2i.x() = LB_MAX( node->left->boundary2i.x(), node->right->boundary2i.x() ); node->boundary2i.y() = node->left->boundary2i.y() + node->right->boundary2i.y(); node->boundaryf = LB_MAX( node->left->boundaryf, node->right->boundaryf ); break; case MODE_DB: node->boundary2i.x() = LB_MAX( node->left->boundary2i.x(), node->right->boundary2i.x() ); node->boundary2i.y() = LB_MAX( node->left->boundary2i.y(), node->right->boundary2i.y() ); node->boundaryf = node->left->boundaryf +node->right->boundaryf; break; default: LBUNIMPLEMENTED; } node->time = node->left->time + node->right->time; } }
int64_t SocketConnection::write( const void* buffer, const uint64_t bytes ) { if( !isConnected() || _writeFD == INVALID_SOCKET ) return -1; DWORD wrote; WSABUF wsaBuffer = { LB_MIN( bytes, 65535 ), const_cast<char*>( static_cast< const char* >( buffer )) }; ResetEvent( _overlappedWrite.hEvent ); if( WSASend(_writeFD, &wsaBuffer, 1, &wrote, 0, &_overlappedWrite, 0 ) == 0 ) // ok return wrote; if( WSAGetLastError() != WSA_IO_PENDING ) return -1; const DWORD err = WaitForSingleObject( _overlappedWrite.hEvent, INFINITE ); switch( err ) { case WAIT_FAILED: case WAIT_ABANDONED: { LBWARN << "Write error" << lunchbox::sysError << std::endl; return -1; } default: LBWARN << "Unhandled write error " << err << ": " << lunchbox::sysError << std::endl; // no break; case WAIT_OBJECT_0: break; } DWORD got = 0; DWORD flags = 0; if( WSAGetOverlappedResult( _writeFD, &_overlappedWrite, &got, false, &flags )) { return got; } switch( WSAGetLastError() ) { case WSA_IO_INCOMPLETE: throw Exception( Exception::TIMEOUT_WRITE ); default: { LBWARN << "Write error : " << lunchbox::sysError << std::endl; return -1; } } LBUNREACHABLE; return -1; }
int64_t IBInterface::postRdmaWrite( const void* buffer, uint32_t numBytes ) { #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clock; clock.reset(); #endif ib_api_status_t ibStatus; ib_wc_t wc; ib_wc_t *wcDone,*wcFree; wcFree = &wc; wcFree->p_next = 0; wcDone = 0; #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clockWait; clockWait.reset(); #endif // validation of the send job do { ibStatus = ib_poll_cq( _completionQueue->getWriteHandle(), &wcFree, &wcDone ); if ( ibStatus == IB_SUCCESS ) { if ( wcDone->status != IB_WCS_SUCCESS ) { LBWARN << "ERROR IN POLL WRITE"<< std::endl; return -1; } _writePoll.getData()[wcDone->wr_id] = true; wcFree = wcDone; wcFree->p_next = 0; wcDone = 0; } else if ( !_writePoll.getData()[ _numBufWrite ] ) { ibStatus = IB_SUCCESS; } } while ( ibStatus == IB_SUCCESS ); #ifdef EQ_MEASURE_TIME _timeTotalWriteWait += clockWait.getTimef(); #endif uint32_t incBytes = 0; uint32_t compt = 0; uint32_t size; size = LB_MIN( numBytes, EQ_MAXBLOCKBUFFER ); ib_local_ds_t list; #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clockCopy; clockCopy.reset(); #endif //memcpy( _writeBlocks[ _numBufWrite ]->buf.getData(), // buffer , size ); eq::lunchbox::fastCopy( _writeBlocks[ _numBufWrite ]->buf.getData(), buffer , size ); list.vaddr = _writeBlocks[ _numBufWrite ]->getVaddr(); #ifdef EQ_MEASURE_TIME _timeCopyBufferWrite += clockCopy.getTimef(); #endif list.lkey = _writeBlocks[_numBufWrite ]->getLocalKey(); list.length = size; // A 64-bit work request identifier that is returned to the consumer // as part of the work completion. _wr.wr_id = _numBufWrite; // A reference to an array of local data segments used by the send // operation. _wr.ds_array = &list; // Number of local data segments specified by this work request. _wr.num_ds = 1; // The type of work request being submitted to the send queue. _wr.wr_type = WR_SEND; // A pointer used to chain work requests together. This permits multiple // work requests to be posted to a queue pair through a single function // call. This value is set to NULL to mark the end of the chain. _wr.p_next = 0; // This routine posts a work request to the send queue of a queue pair ibStatus = ib_post_send( _queuePair, &_wr, 0 ); if ( ibStatus != IB_SUCCESS ) { LBWARN << "ERROR IN POST SEND DATA"<< std::endl; return -1; } _writePoll.getData()[ _numBufWrite ] = false; if ( _numBufWrite == EQ_NUMBLOCKMEMORY -1 ) _ibConnection->incWriteInterface(); _numBufWrite = ( _numBufWrite + 1 ) % EQ_NUMBLOCKMEMORY; #ifdef EQ_MEASURE_TIME _timeTotalWrite += clock.getTimef(); #endif return size; }
int64_t IBInterface::readSync( void* buffer, uint32_t bytes ) { #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clock; clock.reset(); #endif uint32_t comptRead = 0; int64_t sizebuf = _readPoll.getData()[ _numBufRead ]; // if no data in buffer, we ask for a receive operation while ( sizebuf < 1 ) #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clockWait; clockWait.reset(); #endif sizebuf = _waitPollCQ( _numBufRead ); #ifdef EQ_MEASURE_TIME _timeTotalWaitPoll += clockWait.getTimef(); #endif if ( sizebuf > _posReadInBuffer ) { #ifdef EQ_MEASURE_TIME eq::lunchbox::Clock clockCopy; clockCopy.reset(); #endif // find a better memcpy or a system that we don't need to use memcpy // copy buffer uint32_t nbRead = LB_MIN( bytes, sizebuf - _posReadInBuffer); /* memcpy(reinterpret_cast<char*>( buffer ) + comptRead, reinterpret_cast<char*> ( _readBlocks[_numBufRead]->buf.getData()) + _posReadInBuffer, nbRead );*/ eq::lunchbox::fastCopy( reinterpret_cast<char*>( buffer ) + comptRead, reinterpret_cast<char*> ( _readBlocks[_numBufRead]->buf.getData() ) + _posReadInBuffer, nbRead ); _posReadInBuffer += nbRead; #ifdef EQ_MEASURE_TIME _timeCopyBufferRead += clockCopy.getTimef(); #endif // all buffer has been taken if ( _posReadInBuffer == sizebuf ) { _completionQueue->removeEventRead(); // init var for next read _posReadInBuffer = 0; _readPoll.getData()[ _numBufRead ] = 0; // next Read in on the next CQ if (_numBufRead == EQ_NUMBLOCKMEMORY-1 ) _ibConnection->incReadInterface(); // notify that read is finnish _ibPostRecv( _numBufRead ); // To do work with more buffer in a CQ _numBufRead = ( _numBufRead + 1 ) % EQ_NUMBLOCKMEMORY; } return nbRead; } LBWARN << "ERROR IN READ SYNC"<< std::endl; return -1; #ifdef EQ_MEASURE_TIME _floatTimeReadSync += clock.getTimef(); #endif }