bool GPUAsyncLoader::_processCommands( bool& sleepWait ) { GPUCommand gpuCommand; bool paused = false; while( paused || _commands.tryPop( gpuCommand )) { if( paused ) // wait for next command if was paused gpuCommand = _commands.pop(); switch( gpuCommand.type ) { case GPUCommand::PAUSE: paused = true; break; case GPUCommand::PAUSE_AND_REPORT: paused = true; _loadResponds.push( GPULoadRespond( GPULoadRequest(), GPULoadStatus::PAUSED )); break; case GPUCommand::RESUME: paused = false; sleepWait = false; break; case GPUCommand::UPDATE: _update3DTexture(); break; case GPUCommand::EXIT: LBINFO << "Exiting GPU fetcher." << std::endl; LBINFO << "++==++==++==++==++==++==++==++==++==++==++==++==++==++==++==++==++==++==++==" << std::endl; cleanup(); return true; default: LBERROR << "Unknown command to gpuAsyncLoader: " << static_cast<uint>(gpuCommand.type) << std::endl; } } return false; }
/** * Function for creating and holding of shared context. * Generation and uploading of new textures over some period with sleep time. */ void GPUAsyncLoader::runLocal() { LBASSERT( !_storageTexture3D ); EQ_GL_CALL( glGenTextures( 1, &_storageTexture3D )); // confirm successfull loading _loadResponds.push( GPULoadRespond( GPULoadRequest(), GPULoadStatus::INITIALIZED )); // start fresh in paused state postCommand( GPUCommand::PAUSE ); std::string name = std::string( "GPU_Loader " ).append( strUtil::toString<>(this) ); util::EventLogger* events = util::StatLogger::instance().createLogger( name ); LBASSERT( events ); uint64_t blocksLoaded = 0; LBINFO << "async GPU fetcher initialized: " << getWindow() << std::endl; LBINFO << "=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+" << std::endl; lunchbox::Clock clock; while( true ) { // try to read new loading request bool sleepWait = true; GPULoadRequest loadRequestTmp; if( _loadRequests.getFront( loadRequestTmp )) { // LBWARN << "GPUAsyncLoader: trying to load request" << std::endl; sleepWait = false; _loadResponds.push( GPULoadRespond( loadRequestTmp, GPULoadStatus::STARTED )); GPULoadRequest loadRequest; bool loadingFailed = true; if( _decompressor && _ramPool && _loadRequests.tryPop( loadRequest ) && loadRequest == loadRequestTmp ) { // load / request new texture const RAMDataElement* dataEl = _ramPool->getData( loadRequest.nodeId, _dataVersion, loadRequest.reload ); bool sizePassed = true; if( dataEl && dataEl->size() < _compressedBS ) { LBERROR << "size of the data returned by RAM Pool is smaller than required by GPU" << std::endl; sizePassed = false; } if( dataEl && sizePassed ) { clock.reset(); // LBWARN << "GPUAsyncLoader: got positive feedback from RAM Pool" << std::endl; const Box_i32 coords = _cacheIndex->getBlockCoordinates( loadRequest.posOnGPU ); if( coords != Box_i32( )) { if( blocksLoaded == 0 ) LBWARN << "Loaded blocks to GPU: " << blocksLoaded << std::endl; // LBWARN << loadRequest << std::endl; _decompressor->load( dataEl, loadRequest ); const uint32_t blockDim = _cacheIndex->getBlockDim(); _pbo->bind(); EQ_GL_CALL( glBindTexture( GL_TEXTURE_3D, _storageTexture3D )); EQ_GL_CALL( glTexSubImage3D( GL_TEXTURE_3D, 0, coords.s.x, coords.s.y, coords.s.z, blockDim, blockDim, blockDim, GL_ALPHA, _byteFormat3D, NULL )); _pbo->unbind(); EQ_GL_CALL( glFinish( )); _loadResponds.push( GPULoadRespond( loadRequest, GPULoadStatus::FINISHED )); loadingFailed = false; const uint32_t compressedSize = _ramPool->getDataHDDIO()->getBlockSize_( loadRequest.treePos ); double timeD = clock.getTimed(); double speedC = (compressedSize / (1024.f * 1024.f)) / timeD; double speedD = ((blockDim*blockDim*blockDim*sizeof(float)) / (1024.f * 1024.f)) / timeD; *events << "GBL (GPU_Block_Loaded) " << (++blocksLoaded) << " in " << timeD << " ms, at " << speedC << " MB/s (" << speedD << " MB/s)" <<std::endl; if( blocksLoaded % 100 == 0 ) LBWARN << "Loaded blocks to GPU: " << blocksLoaded << std::endl; } } } if( loadingFailed ) _loadResponds.push( GPULoadRespond( loadRequestTmp, GPULoadStatus::FAILED )); } // check for commands if( _processCommands( sleepWait )) return; // in case there were no commands it will sleep a bit till the next check if( sleepWait ) { lunchbox::sleep( 20 ); // time in ms if( _processCommands( sleepWait )) return; // check again for new commands } } }
void GPUCacheManager::updateFront( const NodeIdPosVec& desiredIds ) { _gpuLoader->postCommand( GPUCommand::PAUSE ); _gpuLoader->clearLoadRequests(); _processGPULoaderResponces(); _iteration++; // update GPU iterations of currently used data and find not loaded Ids _newIds.clear(); _newIds.reserve( desiredIds.size() ); // protect GPU slot that is being used for async loading if( _nodeIdBeingLoaded != 0 ) _cacheValues[ _cachePosBeingLoaded ].iteration = _iteration; _requests.clear(); for( size_t i = 0; i < desiredIds.size(); ++i ) { const NodeIdPos& testNodeId = desiredIds[i]; NodeIdHash::const_iterator nodeIdIterator = _usedElements.find( testNodeId.id ); if( nodeIdIterator != _usedElements.end() ) { const GpuLocation& nodeId = nodeIdIterator->second; const uint32_t cachePos = nodeId.posOnGpu; LBASSERT( cachePos < _cacheValues.size() ); LBASSERT( _cacheValues[ cachePos ].nodeId == testNodeId.id ); _cacheValues[ cachePos ].iteration = _iteration; }else { _newIds.push_back( desiredIds[i] ); } } // allocate space on GPU (find what will not be used and can be pushed away) _releaseIds.clear(); _releaseIds.reserve( _newIds.size() ); LBASSERT( _cacheValuesTmp.size() == _cacheValues.size() ); memcpy( &_cacheValuesTmp[0], &_cacheValues[0], sizeof(GPUCacheValue) * _cacheValues.size()); qsort( &_cacheValuesTmp[0], _cacheValuesTmp.size(), sizeof(GPUCacheValue), GPUCacheValueIterationCmp ); uint32_t releaseCount = 0; while( _cacheValuesTmp[ releaseCount ].iteration < _iteration && // remove only older data releaseCount < _cacheValuesTmp.size() && // remove less than cache size releaseCount < _newIds.size()) // remove not more than necessary { if( !_cacheValuesTmp[ releaseCount ].isFree() ) { _releaseIds.push_back( _cacheValuesTmp[ releaseCount ].nodeId ); } releaseCount++; } // remove unused Ids for( size_t i = 0; i < _releaseIds.size(); ++i ) { NodeIdHash::iterator nodeIdIterator = _usedElements.find( _releaseIds[i] ); if( nodeIdIterator == _usedElements.end() ) { LBERROR << "element is not in the cache - not possible to release it" << std::endl; }else { const uint32_t cachePos = nodeIdIterator->second.posOnGpu; LBASSERT( cachePos < _cacheValues.size( )); LBASSERT( _cacheValues[ cachePos ].nodeId == _releaseIds[i] ); _cacheValues[ cachePos ].reset(); _usedElements.erase( nodeIdIterator ); } } // unlock data in RAM if( _releaseIds.size() > 0 ) _gpuLoader->releaseRAMData( _releaseIds ); // schedule new loadings _requests.reserve( _requests.size() + releaseCount ); LBASSERT( releaseCount <= _cacheValues.size() ); LBASSERT( releaseCount <= _newIds.size() ); for( size_t i = 0; i < releaseCount; ++i ) { if( _newIds[i].id == _nodeIdBeingLoaded ) continue; const uint32_t cachePos = _cacheValuesTmp[i].getPos(); LBASSERT( _cacheValues[ cachePos ].isFree() ); _requests.push_back( GPULoadRequest( _newIds[i].id, cachePos, _newIds[i].treePos, false )); } _gpuLoader->clearRAMLoadRequests(); if( _requests.size() > 0 ) _gpuLoader->postLoadRequestVec( _requests ); _gpuLoader->postCommand( GPUCommand::RESUME ); }