void PathDeviceRenderThread::RenderThreadImpl(PathDeviceRenderThread *renderThread) { cerr << "[PathDeviceRenderThread::" << renderThread->threadIndex << "] Rendering thread started" << endl; std::deque<RayBuffer *> todoBuffers; for(size_t i = 0; i < PATH_DEVICE_RENDER_BUFFER_COUNT; i++) todoBuffers.push_back(renderThread->rayBuffers[i]); try { while (!boost::this_thread::interruption_requested()) { // Produce buffers to trace while (todoBuffers.size() > 0) { RayBuffer *rayBuffer = todoBuffers.front(); todoBuffers.pop_front(); rayBuffer->Reset(); renderThread->pathIntegrators[rayBuffer->GetUserData()]->FillRayBuffer(rayBuffer); renderThread->intersectionDevice->PushRayBuffer(rayBuffer); } RayBuffer *rayBuffer = renderThread->intersectionDevice->PopRayBuffer(); renderThread->pathIntegrators[rayBuffer->GetUserData()]->AdvancePaths(rayBuffer); todoBuffers.push_back(rayBuffer); } cerr << "[PathDeviceRenderThread::" << renderThread->threadIndex << "] Rendering thread halted" << endl; } catch (boost::thread_interrupted) { cerr << "[PathDeviceRenderThread::" << renderThread->threadIndex << "] Rendering thread halted" << endl; } #if !defined(LUXRAYS_DISABLE_OPENCL) catch (cl::Error err) { cerr << "[PathDeviceRenderThread::" << renderThread->threadIndex << "] Rendering thread ERROR: " << err.what() << "(" << err.err() << ")" << endl; } #endif }
void PathNativeRenderThread::RenderThreadImpl(PathNativeRenderThread *renderThread) { cerr << "[PathNativeRenderThread::" << renderThread->threadIndex << "] Rendering thread started" << endl; try { RayBuffer *rayBuffer = renderThread->rayBuffer; PathIntegrator *pathIntegrator = renderThread->pathIntegrator; NativeThreadIntersectionDevice *intersectionDevice = renderThread->intersectionDevice; while (!boost::this_thread::interruption_requested()) { rayBuffer->Reset(); pathIntegrator->FillRayBuffer(rayBuffer); intersectionDevice->Intersect(rayBuffer); pathIntegrator->AdvancePaths(rayBuffer); } cerr << "[PathNativeRenderThread::" << renderThread->threadIndex << "] Rendering thread halted" << endl; } catch (boost::thread_interrupted) { cerr << "[PathNativeRenderThread::" << renderThread->threadIndex << "] Rendering thread halted" << endl; } #if !defined(LUXRAYS_DISABLE_OPENCL) catch (cl::Error err) { cerr << "[PathNativeRenderThread::" << renderThread->threadIndex << "] Rendering thread ERROR: " << err.what() << "(" << err.err() << ")" << endl; } #endif }
void NativeThreadIntersectionDevice::IntersectionThread(NativeThreadIntersectionDevice *renderDevice) { LR_LOG(renderDevice->deviceContext, "[NativeThread device::" << renderDevice->deviceName << "] Rendering thread started"); try { RayBufferQueue *queue = renderDevice->externalRayBufferQueue ? renderDevice->externalRayBufferQueue : &(renderDevice->rayBufferQueue); const double startTime = WallClockTime(); while (!boost::this_thread::interruption_requested()) { const double t1 = WallClockTime(); RayBuffer *rayBuffer = queue->PopToDo(); renderDevice->statsDeviceIdleTime += WallClockTime() - t1; // Trace rays const Ray *rb = rayBuffer->GetRayBuffer(); RayHit *hb = rayBuffer->GetHitBuffer(); const size_t rayCount = rayBuffer->GetRayCount(); for (unsigned int i = 0; i < rayCount; ++i) { hb[i].SetMiss(); renderDevice->dataSet->Intersect(&rb[i], &hb[i]); } renderDevice->statsTotalDataParallelRayCount += rayCount; queue->PushDone(rayBuffer); renderDevice->statsDeviceTotalTime = WallClockTime() - startTime; } LR_LOG(renderDevice->deviceContext, "[NativeThread device::" << renderDevice->deviceName << "] Rendering thread halted"); } catch (boost::thread_interrupted) { LR_LOG(renderDevice->deviceContext, "[NativeThread device::" << renderDevice->deviceName << "] Rendering thread halted"); } }
void FPGAIntersectionDevice::IntersectionThread(FPGAIntersectionDevice *renderDevice) { LR_LOG(renderDevice->deviceContext, "[FPGA device::" << renderDevice->deviceName << "] Rendering thread started"); try { RayBufferQueue *queue = renderDevice->rayBufferQueue; const double startTime = WallClockTime(); while (!boost::this_thread::interruption_requested()) { const double t1 = WallClockTime(); RayBuffer *rayBuffer = queue->PopToDo(); renderDevice->statsDeviceIdleTime += WallClockTime() - t1; // Trace rays const Ray *rb = rayBuffer->GetRayBuffer(); RayHit *hb = rayBuffer->GetHitBuffer(); const size_t rayCount = rayBuffer->GetRayCount(); for (unsigned int i = 0; i < rayCount; ++i) { hb[i].SetMiss(); //renderDevice->accel->Intersect(&rb[i], &hb[i]); } if (xrti_intersect((void *)rb, rayCount*sizeof(Ray), rayCount, (void *)hb, rayCount*sizeof(RayHit)) < 0) { throw std::runtime_error("xrti_intersect failed"); } renderDevice->statsDeviceTotalTime = WallClockTime() - startTime; renderDevice->statsTotalDataParallelRayCount += rayCount; queue->PushDone(rayBuffer); // FILE *f = fopen("/mnt/scratch/sam/ray_tracer/tb_generator/data/rays.txt", "w"); // unsigned char *b = (unsigned char *)rb; // for (u_int i = 0; i < rayCount * sizeof(luxrays::Ray); i++) { // fprintf(f, "%02x", b[i]); // if ((i + 1) % 48 == 0) // fprintf(f, "\n"); // } // fclose(f); // f = fopen("/mnt/scratch/sam/ray_tracer/tb_generator/data/hits.txt", "w"); // b = (unsigned char *)hb; // for (u_int i = 0; i < rayCount * sizeof(luxrays::RayHit); i++) { // fprintf(f, "%02x", b[i]); // if ((i + 1) % 20 == 0) // fprintf(f, "\n"); // } // fclose(f); } LR_LOG(renderDevice->deviceContext, "[FPGA device::" << renderDevice->deviceName << "] Rendering thread halted"); } catch (boost::thread_interrupted) { LR_LOG(renderDevice->deviceContext, "[FPGA device::" << renderDevice->deviceName << "] Rendering thread halted"); } }
void BVH::trace(RayBuffer& rays, RayStats* stats) const { for(S32 i=0;i<rays.getSize();i++) { Ray ray = rays.getRayForSlot(i); // takes a local copy RayResult& result = rays.getMutableResultForSlot(i); result.clear(); currentTreelet = -2; uniqueTreelets.clear(); if(stats) { stats->platform = m_platform; stats->numRays++; } traceRecursive(m_root, ray,result,rays.getNeedClosestHit(), stats); } }
void NativeRenderThread::RenderThreadImpl(NativeRenderThread *renderThread) { cerr << "[NativeRenderThread::" << renderThread->threadIndex << "] Rendering thread started" << endl; try { RayBuffer *rayBuffer = renderThread->rayBuffer; PathIntegrator *pathIntegrator = renderThread->pathIntegrator; NativeIntersectionDevice *intersectionDevice = renderThread->intersectionDevice; while (!boost::this_thread::interruption_requested()) { rayBuffer->Reset(); pathIntegrator->FillRayBuffer(rayBuffer); intersectionDevice->TraceRays(rayBuffer); pathIntegrator->AdvancePaths(rayBuffer); } cerr << "[NativeRenderThread::" << renderThread->threadIndex << "] Rendering thread halted" << endl; } catch (boost::thread_interrupted) { cerr << "[NativeRenderThread::" << renderThread->threadIndex << "] Rendering thread halted" << endl; } catch (cl::Error err) { cerr << "[NativeRenderThread::" << renderThread->threadIndex << "] RenderingERROR: " << err.what() << "(" << err.err() << ")" << endl; } }
void drawTestScene() { bMap->clear(); cam.buildRays(*bMap,buffer,lights); //cam.buildCentralRay(bMap,&buffer,lights); RayLink *t; t = buffer.start->next; while(buffer.size > 0) { buffer.size--; t->task->execute(sceneTree); t = t->next; //delete t->task; /* t = buffer.getFront(); t->task->execute(sceneTree); buffer.deleteFront(); */ countRayExecuted++; } /* unsigned char r, g, b; for(int i = 0; i < IMAGE_HEIGHT; i++) for(int j = 0; j < IMAGE_WIDTH; j++) { r = ((i/3)%15 <= 2) ? 0 : 255; g = ((j/3)%15 <= 2) ? 0 : 255; b = ((i/3)%15 < (j/3)%15) ? 0 : 255; bMap->setBMapPixel(i, j, r, g, b); } */ printf("drawn\n"); buffer.clear(); }
void HybridRenderThread::RenderFunc() { //SLG_LOG("[HybridRenderThread::" << threadIndex << "] Rendering thread started"); boost::this_thread::disable_interruption di; Film *film = threadFilm; const u_int filmWidth = film->GetWidth(); const u_int filmHeight = film->GetHeight(); pixelCount = filmWidth * filmHeight; RandomGenerator *rndGen = new RandomGenerator(threadIndex + renderEngine->seedBase); const u_int incrementStep = 4096; vector<HybridRenderState *> states(incrementStep); try { // Initialize the first states for (u_int i = 0; i < states.size(); ++i) states[i] = AllocRenderState(rndGen); u_int generateIndex = 0; u_int collectIndex = 0; while (!boost::this_thread::interruption_requested()) { // Generate new rays up to the point to have 3 pending buffers while (pendingRayBuffers < 3) { states[generateIndex]->GenerateRays(this); generateIndex = (generateIndex + 1) % states.size(); if (generateIndex == collectIndex) { //SLG_LOG("[HybridRenderThread::" << threadIndex << "] Increasing states size by " << incrementStep); //SLG_LOG("[HybridRenderThread::" << threadIndex << "] State size: " << states.size()); // Insert a set of new states and continue states.insert(states.begin() + generateIndex, incrementStep, NULL); for (u_int i = generateIndex; i < generateIndex + incrementStep; ++i) states[i] = AllocRenderState(rndGen); collectIndex += incrementStep; } } //SLG_LOG("[HybridRenderThread::" << threadIndex << "] State size: " << states.size()); //SLG_LOG("[HybridRenderThread::" << threadIndex << "] generateIndex: " << generateIndex); //SLG_LOG("[HybridRenderThread::" << threadIndex << "] collectIndex: " << collectIndex); //SLG_LOG("[HybridRenderThread::" << threadIndex << "] pendingRayBuffers: " << pendingRayBuffers); // Collect rays up to the point to have only 1 pending buffer while (pendingRayBuffers > 1) { samplesCount += states[collectIndex]->CollectResults(this); const u_int newCollectIndex = (collectIndex + 1) % states.size(); // A safety-check, it should never happen if (newCollectIndex == generateIndex) break; collectIndex = newCollectIndex; } } //SLG_LOG("[HybridRenderThread::" << threadIndex << "] Rendering thread halted"); } catch (boost::thread_interrupted) { SLG_LOG("[HybridRenderThread::" << threadIndex << "] Rendering thread halted"); } #ifndef LUXRAYS_DISABLE_OPENCL catch (cl::Error err) { SLG_LOG("[HybridRenderThread::" << threadIndex << "] Rendering thread ERROR: " << err.what() << "(" << luxrays::utils::oclErrorString(err.err()) << ")"); } #endif // Clean current ray buffers if (currentRayBufferToSend) { currentRayBufferToSend->Reset(); freeRayBuffers.push_back(currentRayBufferToSend); currentRayBufferToSend = NULL; } if (currentReiceivedRayBuffer) { currentReiceivedRayBuffer->Reset(); freeRayBuffers.push_back(currentReiceivedRayBuffer); currentReiceivedRayBuffer = NULL; } // Free all states for (u_int i = 0; i < states.size(); ++i) delete states[i]; delete rndGen; // Remove all pending ray buffers while (pendingRayBuffers > 0) { RayBuffer *rayBuffer = device->PopRayBuffer(); --(pendingRayBuffers); rayBuffer->Reset(); freeRayBuffers.push_back(rayBuffer); } }
F32 CudaTracer::traceBatch(RayBuffer& rays) { // No rays => done. int numRays = rays.getSize(); if (!numRays) return 0.0f; // Check BVH consistency. if (!m_bvh) fail("CudaTracer: No BVH!"); if (m_bvh->getLayout() != getDesiredBVHLayout()) fail("CudaTracer: Incorrect BVH layout!"); // Get BVH buffers. CUdeviceptr nodePtr = m_bvh->getNodeBuffer().getCudaPtr(); CUdeviceptr triPtr = m_bvh->getTriWoopBuffer().getCudaPtr(); Buffer& indexBuf = m_bvh->getTriIndexBuffer(); Vec2i nodeOfsA = m_bvh->getNodeSubArray(0); Vec2i nodeOfsB = m_bvh->getNodeSubArray(1); Vec2i nodeOfsC = m_bvh->getNodeSubArray(2); Vec2i nodeOfsD = m_bvh->getNodeSubArray(3); Vec2i triOfsA = m_bvh->getTriWoopSubArray(0); Vec2i triOfsB = m_bvh->getTriWoopSubArray(1); Vec2i triOfsC = m_bvh->getTriWoopSubArray(2); // Compile kernel. CudaModule* module = compileKernel(); CudaKernel kernel = module->getKernel("trace"); // Set parameters. kernel.setParams( numRays, // numRays (rays.getNeedClosestHit()) ? 0 : 1, // anyHit rays.getRayBuffer().getCudaPtr(), // rays rays.getResultBuffer().getMutableCudaPtr(), // results nodePtr + nodeOfsA.x, // nodesA nodePtr + nodeOfsB.x, // nodesB nodePtr + nodeOfsC.x, // nodesC nodePtr + nodeOfsD.x, // nodesD triPtr + triOfsA.x, // trisA triPtr + triOfsB.x, // trisB triPtr + triOfsC.x, // trisC indexBuf.getCudaPtr()); // triIndices // Set texture references. module->setTexRef("t_rays", rays.getRayBuffer(), CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_nodesA", nodePtr + nodeOfsA.x, nodeOfsA.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_nodesB", nodePtr + nodeOfsB.x, nodeOfsB.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_nodesC", nodePtr + nodeOfsC.x, nodeOfsC.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_nodesD", nodePtr + nodeOfsD.x, nodeOfsD.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_trisA", triPtr + triOfsA.x, triOfsA.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_trisB", triPtr + triOfsB.x, triOfsB.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_trisC", triPtr + triOfsC.x, triOfsC.y, CU_AD_FORMAT_FLOAT, 4); module->setTexRef("t_triIndices", indexBuf, CU_AD_FORMAT_SIGNED_INT32, 1); // Determine block and grid sizes. int desiredWarps = (numRays + 31) / 32; if (m_kernelConfig.usePersistentThreads != 0) { *(S32*)module->getGlobal("g_warpCounter").getMutablePtr() = 0; desiredWarps = 720; // Tesla: 30 SMs * 24 warps, Fermi: 15 SMs * 48 warps } Vec2i blockSize(m_kernelConfig.blockWidth, m_kernelConfig.blockHeight); int blockWarps = (blockSize.x * blockSize.y + 31) / 32; int numBlocks = (desiredWarps + blockWarps - 1) / blockWarps; // Launch. return kernel.launchTimed(numBlocks * blockSize.x * blockSize.y, blockSize); }