SimpleDeferredDemo::SimpleDeferredDemo(const Device* deviceData) : Demo() { // m_enablePostEffect = 1; INITIALIZE_DEVICE_DATA( deviceData ); DeviceRenderTargetDX11::createRenderTarget( m_deviceData, g_wWidth, g_wHeight, m_colorRT ); DeviceRenderTargetDX11::createRenderTarget( m_deviceData, g_wWidth, g_wHeight, m_posRT ); DeviceRenderTargetDX11::createRenderTarget( m_deviceData, g_wWidth, g_wHeight, m_normalRT ); { // build kernel const char *option = "-I ..\\"; KernelBuilder<(DeviceType)MyDeviceType> builder; builder.setFromFile( m_deviceData, "GDemos\\SimpleDeferredDemoKernel", option, true ); builder.createKernel("PostProcessKernel", m_kernel ); builder.createKernel("ClearLightIdxKernel", m_clearLightIdxKernel ); builder.createKernel("BuildLightIdxKernel", m_buildLightIdxKernel ); } m_buffer.allocate( m_deviceData, g_wWidth*g_wHeight ); m_lightPosBuffer.allocate( m_deviceData, MAX_LIGHTS ); m_lightColorBuffer.allocate( m_deviceData, MAX_LIGHTS ); { ADLASSERT( MAX_LIGHTS_PER_TILE%32 == 0 ); int nClusterX = calcNumTiles(g_wWidth, CLUSTER_SIZE); int nClusterY = calcNumTiles(g_wHeight, CLUSTER_SIZE); m_tileBuffer.allocate( m_deviceData, MAX_LIGHTS_PER_TILE_IN_32B*nClusterX*nClusterY ); } { D3D11_INPUT_ELEMENT_DESC layout[] = { { "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "NORMAL", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0 }, { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 40, D3D11_INPUT_PER_VERTEX_DATA, 0 }, }; ShaderUtilsDX11 builder( m_deviceData, "GDemos\\Deferred.hlsl" ); builder.createVertexShader( "DeferredGPVS", m_gpVShader, ARRAYSIZE( layout ), layout ); builder.createPixelShader( "DeferredGPPS", m_gpPShader ); } m_gVShader = g_defaultVertexShader; m_gPShader = g_defaultPixelShader; }
void ObjLoader::readFace(char* buf) { char tmp[128]; int nOfSlash = strchrcount(buf, '/'); if( nOfSlash == 0 ) { int4 face; sscanf(buf,"%s %d %d %d", tmp,&face.s[0], &face.s[1], &face.s[2]); face.s[0]--;face.s[1]--;face.s[2]--; m_faces.pushBack(face); } else if( nOfSlash == 3) { int4 face; int4 faceNormal; sscanf(buf,"%s %d/%d %d/%d %d/%d",tmp, &face.s[0], &faceNormal.s[0], &face.s[1], &faceNormal.s[1], &face.s[2], &faceNormal.s[2]); face.s[0]--;face.s[1]--;face.s[2]--; faceNormal.s[0]--; faceNormal.s[1]--; faceNormal.s[2]--; m_faces.pushBack(face); m_faceNormals.pushBack(faceNormal); } else if( nOfSlash == 6) { int4 face; int4 faceNormal; sscanf(buf,"%s %d//%d %d//%d %d//%d",tmp, &face.s[0], &faceNormal.s[0], &face.s[1], &faceNormal.s[1], &face.s[2], &faceNormal.s[2]); face.s[0]--;face.s[1]--;face.s[2]--; faceNormal.s[0]--; faceNormal.s[1]--; faceNormal.s[2]--; m_faces.pushBack(face); m_faceNormals.pushBack(faceNormal); } else { ADLASSERT( 0 ); } }
ObjLoader::ObjLoader(char *filename) { { FILE *f; char buf[4096]; f = std::fopen(filename, "r"); if (!f) { printf("File is not exist"); ADLASSERT( 0 ); } while (fgets(buf, sizeof(buf), f)) decode(buf); fclose(f); } printf("%s : %3.2fK triangles\n", filename, m_faces.getSize()/1000.f); }
void SimpleDeferredDemo::renderPost() { // if(1) return; ADLASSERT( TILE_SIZE <= 16 ); int nClusterX = calcNumTiles(g_wWidth, CLUSTER_SIZE);//max2( 1, (g_wWidth/CLUSTER_SIZE)+(!(g_wWidth%CLUSTER_SIZE)?0:1) ); int nClusterY = calcNumTiles(g_wHeight, CLUSTER_SIZE);//max2( 1, (g_wHeight/CLUSTER_SIZE)+(!(g_wHeight%CLUSTER_SIZE)?0:1) ); // todo. define own constant buffer ConstantBuffer cb; { cb.m_world = XMMatrixIdentity(); cb.m_view = g_ViewTr; cb.m_projection = g_ProjectionTr; } { // clear lightIdxBuffer BufferInfo bInfo[] = { BufferInfo( &m_tileBuffer ) }; Launcher launcher( m_deviceData, &m_clearLightIdxKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); // launcher.pushBackRW( m_tileBuffer ); launcher.launch1D( nClusterX*nClusterY*MAX_LIGHTS_PER_TILE_IN_32B ); } { // set lightIdxBuffer BufferInfo bInfo[] = { BufferInfo( &m_lightPosBuffer, true ), BufferInfo( &m_lightColorBuffer, true ), BufferInfo( &m_tileBuffer ) }; Launcher launcher( m_deviceData, &m_buildLightIdxKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); // launcher.pushBackR( m_lightPosBuffer ); // launcher.pushBackR( m_lightColorBuffer ); // launcher.pushBackRW( m_tileBuffer ); launcher.setConst( g_constBuffer, cb ); launcher.launch1D( 64 ); } Stopwatch dsw( m_deviceData ); dsw.start(); { // run CS BufferDX11<int> cBuffer; cBuffer.m_srv = m_colorRT.m_srv; BufferDX11<int> pBuffer; pBuffer.m_srv = m_posRT.m_srv; BufferDX11<int> nBuffer; nBuffer.m_srv = m_normalRT.m_srv; BufferInfo bInfo[] = { BufferInfo( &m_lightPosBuffer, true ), BufferInfo( &m_lightColorBuffer, true ), BufferInfo( &cBuffer, true ), BufferInfo( &pBuffer, true ), BufferInfo( &nBuffer, true ), BufferInfo( &m_tileBuffer, true ), BufferInfo( &m_buffer ) }; Launcher launcher( m_deviceData, &m_kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); // launcher.pushBackR( m_lightPosBuffer ); // launcher.pushBackR( m_lightColorBuffer ); // launcher.pushBackR( cBuffer ); // launcher.pushBackR( pBuffer ); // launcher.pushBackR( nBuffer ); // launcher.pushBackR( m_tileBuffer ); // launcher.pushBackRW( m_buffer ); launcher.setConst( g_constBuffer, cb ); launcher.launch2D( nClusterX*TILE_SIZE, nClusterY*TILE_SIZE, TILE_SIZE, TILE_SIZE ); } dsw.stop(); { m_nTxtLines = 0; sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "%3.3fms", dsw.getMs()); } // DeviceDX11* dd = (DeviceDX11*)m_deviceData; ID3D11RenderTargetView* pOrigRTV = NULL; ID3D11DepthStencilView* pOrigDSV = NULL; dd->m_context->OMGetRenderTargets( 1, &pOrigRTV, &pOrigDSV ); // release for the renderPre pOrigRTV->Release(); pOrigDSV->Release(); { float ClearColor[4] = { 0.f, 0.f, 0.f, 1.0f }; ID3D11RenderTargetView* aRTViews[ 1 ] = { pOrigRTV }; dd->m_context->OMSetRenderTargets( 1, aRTViews, pOrigDSV ); dd->m_context->ClearDepthStencilView( pOrigDSV, D3D11_CLEAR_DEPTH, 1.0f, 0 ); dd->m_context->ClearRenderTargetView( pOrigRTV, ClearColor ); dd->m_context->PSSetShaderResources( 0, 1, ((BufferDX11<float4>*)&m_buffer)->getSRVPtr() ); // render to screen renderFullQuad( m_deviceData, &g_bufferToRTPixelShader, make_float4((float)g_wWidth, (float)g_wHeight, 0,0 ) ); ID3D11ShaderResourceView* ppSRVNULL[16] = { 0 }; dd->m_context->PSSetShaderResources( 0, 1, ppSRVNULL ); } pOrigRTV->Release(); pOrigDSV->Release(); g_defaultVertexShader = m_gVShader; g_defaultPixelShader = m_gPShader; }
void btGpuNarrowphaseAndSolver::computeContactsAndSolver(cl_mem broadphasePairs, int numBroadphasePairs) { BT_PROFILE("computeContactsAndSolver"); bool bGPU = (m_internalData != 0); int maxBodyIndex = m_internalData->m_numAcceleratedRigidBodies; if (!maxBodyIndex) return; int numOfConvexRBodies = maxBodyIndex; ChNarrowphaseBase::Config cfgNP; cfgNP.m_collisionMargin = 0.01f; int nContactOut = 0; //printf("convexPairsOut.m_size = %d\n",m_internalData->m_convexPairsOutGPU->m_size); btOpenCLArray<int2> broadphasePairsGPU(m_context,m_queue); broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs); bool useCulling = true; if (useCulling) { BT_PROFILE("ChNarrowphase::culling"); clFinish(m_queue); numPairsOut = m_internalData->m_narrowPhase->culling( &broadphasePairsGPU, numBroadphasePairs, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_convexPairsOutGPU, cfgNP); } { if (m_planeBodyIndex>=0) { BT_PROFILE("ChNarrowphase:: plane versus convex"); //todo: get rid of this dynamic allocation int2* hostPairs = new int2[m_internalData->m_numAcceleratedRigidBodies-1]; int index=0; for (int i=0;i<m_internalData->m_numAcceleratedRigidBodies;i++) { if (i!=m_planeBodyIndex) { hostPairs[index].x = m_planeBodyIndex; hostPairs[index].y = i; index++; } } assert(m_internalData->m_numAcceleratedRigidBodies-1 == index); m_internalData->m_planePairs->copyFromHostPointer(hostPairs,index); clFinish(m_queue); delete[]hostPairs; //convex versus plane m_internalData->m_narrowPhase->execute(m_internalData->m_planePairs, index, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, 0,0,m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); } } { BT_PROFILE("ChNarrowphase::execute"); if (useCulling) { //convex versus convex //m_internalData->m_narrowPhase->execute(m_internalData->m_convexPairsOutGPU,numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); #define USE_CONVEX_CONVEX_HOST 1 #ifdef USE_CONVEX_CONVEX_HOST m_internalData->m_convexPairsOutGPU->resize(numPairsOut); m_internalData->m_pBufContactOutGPU->resize(nContactOut); m_internalData->m_gpuSatCollision->computeConvexConvexContactsHost( m_internalData->m_convexPairsOutGPU, numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP, m_internalData->m_convexPolyhedra,m_internalData->m_convexVertices,m_internalData->m_uniqueEdges, m_internalData->m_convexFaces,m_internalData->m_convexIndices); #else m_internalData->m_narrowPhase->execute( m_internalData->m_convexPairsOutGPU, numPairsOut, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); #endif } else { m_internalData->m_narrowPhase->execute(&broadphasePairsGPU, numBroadphasePairs, m_internalData->m_bodyBufferGPU, m_internalData->m_ShapeBuffer, m_internalData->m_pBufContactOutGPU, nContactOut, cfgNP); } clFinish(m_queue); } if (!nContactOut) return; bool useSolver = true;//true;//false; if (useSolver) { float dt=1./60.; SolverBase::ConstraintCfg csCfg( dt ); csCfg.m_enableParallelSolve = true; csCfg.m_averageExtent = 0.2f;//@TODO m_averageObjExtent; csCfg.m_staticIdx = m_planeBodyIndex; btOpenCLArray<Contact4>* contactsIn = m_internalData->m_pBufContactOutGPU; const btOpenCLArray<RigidBodyBase::Body>* bodyBuf = m_internalData->m_bodyBufferGPU; void* additionalData = m_internalData->m_frictionCGPU; const btOpenCLArray<RigidBodyBase::Inertia>* shapeBuf = m_internalData->m_inertiaBufferGPU; SolverData contactCOut = m_internalData->m_contactCGPU; int nContacts = nContactOut; bool useCPU=false; { BT_PROFILE("GPU batch"); { //@todo: just reserve it, without copy of original contact (unless we use warmstarting) if( m_internalData->m_solverGPU->m_contactBuffer) { m_internalData->m_solverGPU->m_contactBuffer->resize(nContacts); } if( m_internalData->m_solverGPU->m_contactBuffer == 0 ) { m_internalData->m_solverGPU->m_contactBuffer = new btOpenCLArray<Contact4>(m_context,m_queue, nContacts ); m_internalData->m_solverGPU->m_contactBuffer->resize(nContacts); } btOpenCLArray<Contact4>* contactNative = contactsIn; const btOpenCLArray<RigidBodyBase::Body>* bodyNative = bodyBuf; { //btOpenCLArray<RigidBodyBase::Body>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); //btOpenCLArray<Contact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); const int sortAlignment = 512; // todo. get this out of sort if( csCfg.m_enableParallelSolve ) { int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); btOpenCLArray<u32>* countsNative = m_internalData->m_solverGPU->m_numConstraints; btOpenCLArray<u32>* offsetsNative = m_internalData->m_solverGPU->m_offsets; { // 2. set cell idx BT_PROFILE("GPU set cell idx"); struct CB { int m_nContacts; int m_staticIdx; float m_scale; int m_nSplit; }; ADLASSERT( sortSize%64 == 0 ); CB cdata; cdata.m_nContacts = nContacts; cdata.m_staticIdx = csCfg.m_staticIdx; cdata.m_scale = 1.f/(BT_SOLVER_N_OBJ_PER_SPLIT*csCfg.m_averageExtent); cdata.m_nSplit = BT_SOLVER_N_SPLIT; m_internalData->m_solverGPU->m_sortDataBuffer->resize(nContacts); btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( bodyBuf->getBufferCL()), btBufferInfoCL( m_internalData->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; btLauncherCL launcher(m_queue, m_internalData->m_solverGPU->m_setSortDataKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( sortSize, 64 ); } bool gpuRadixSort=true; if (gpuRadixSort) { // 3. sort by cell idx BT_PROFILE("gpuRadixSort"); int n = BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT; int sortBit = 32; //if( n <= 0xffff ) sortBit = 16; //if( n <= 0xff ) sortBit = 8; //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); btOpenCLArray<btSortData>& keyValuesInOut = *(m_internalData->m_solverGPU->m_sortDataBuffer); this->m_internalData->m_solverGPU->m_sort32->execute(keyValuesInOut); /*btAlignedObjectArray<btSortData> hostValues; keyValuesInOut.copyToHost(hostValues); printf("hostValues.size=%d\n",hostValues.size()); */ } { // 4. find entries BT_PROFILE("gpuBoundSearch"); m_internalData->m_solverGPU->m_search->execute(*m_internalData->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT,btBoundSearchCL::COUNT); //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, // BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); //unsigned int sum; m_internalData->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);//,&sum ); //printf("sum = %d\n",sum); } { // 5. sort constraints by cellIdx { BT_PROFILE("gpu m_reorderContactKernel"); btInt4 cdata; cdata.x = nContacts; btBufferInfoCL bInfo[] = { btBufferInfoCL( contactNative->getBufferCL() ), btBufferInfoCL( m_internalData->m_solverGPU->m_contactBuffer->getBufferCL()) , btBufferInfoCL( m_internalData->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; btLauncherCL launcher(m_queue,m_internalData->m_solverGPU->m_reorderContactKernel); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); } } } } clFinish(m_queue); { BT_PROFILE("gpu m_copyConstraintKernel"); btInt4 cdata; cdata.x = nContacts; btBufferInfoCL bInfo[] = { btBufferInfoCL( m_internalData->m_solverGPU->m_contactBuffer->getBufferCL() ), btBufferInfoCL( contactNative->getBufferCL() ) }; btLauncherCL launcher(m_queue, m_internalData->m_solverGPU->m_copyConstraintKernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(btBufferInfoCL) ); launcher.setConst( cdata ); launcher.launch1D( nContacts, 64 ); clFinish(m_queue); } bool compareGPU = false; if (gpuBatchContacts) { BT_PROFILE("gpu batchContacts"); m_internalData->m_solverGPU->batchContacts( contactNative, nContacts, m_internalData->m_solverGPU->m_numConstraints, m_internalData->m_solverGPU->m_offsets, csCfg.m_staticIdx ); } if (1) { BT_PROFILE("gpu convertToConstraints"); m_internalData->m_solverGPU->convertToConstraints( bodyBuf, shapeBuf, contactNative, contactCOut, additionalData, nContacts, csCfg ); clFinish(m_queue); } } } if (1) { BT_PROFILE("GPU solveContactConstraint"); m_internalData->m_solverGPU->m_nIterations = 4;//10 m_internalData->m_solverGPU->solveContactConstraint(m_internalData->m_bodyBufferGPU, m_internalData->m_inertiaBufferGPU, m_internalData->m_contactCGPU, 0, nContactOut ); clFinish(m_queue); } #if 0 if (0) { BT_PROFILE("read body velocities back to CPU"); //read body updated linear/angular velocities back to CPU m_internalData->m_bodyBufferGPU->read( m_internalData->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); adl::DeviceUtils::waitForCompletion( m_internalData->m_deviceCL ); } #endif } }
void AdlPrimitivesDemo::test( Buffer<int2>& buf, int size, Stopwatch& sw ) { Kernel* kernel = KernelManager::query( m_deviceData, "..\\..\\AdlDemos\\TestBed\\Demos\\AdlPrimitivesDemoKernel", "FillInt4Kernel" ); Buffer<int4> constBuffer( m_deviceData, 1, BufferBase::BUFFER_CONST ); int numGroups = (size+128*4-1)/(128*4); Buffer<u32> workBuffer0( m_deviceData, numGroups*(16) ); Buffer<u32> workBuffer1( m_deviceData, numGroups*(16) ); Buffer<int2> sortBuffer( m_deviceData, size ); { int2* host = new int2[size]; for(int i=0; i<size; i++) { host[i] = make_int2( getRandom(0, 0xf), i ); } sortBuffer.write( host, size ); DeviceUtils::waitForCompletion( m_deviceData ); delete [] host; } int4 constData; { constData.x = size; constData.y = 0; constData.z = numGroups; constData.w = 0; } sw.start(); int nThreads = size/4; { BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.split(); { constData.w = 1; int nThreads = size/4; BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.split(); { constData.w = 2; int nThreads = size/4; BufferInfo bInfo[] = { BufferInfo( &sortBuffer ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.stop(); { int2* host = new int2[size]; buf.read( host, size ); DeviceUtils::waitForCompletion( m_deviceData ); for(int i=0; i<128*4-1; i++) { ADLASSERT( host[i].x <= host[i+1].x ); } delete [] host; } { float t[3]; sw.getMs(t, 3); // (byte * nElems) sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "LoadStore: %3.2fGB/s (%3.2fns)", (4*8*2)*nThreads/t[0]/1000/1000, t[0]*1000.f); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "GenHistog: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[1]/1000/1000, t[1]*1000.f); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "FullSort: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[2]/1000/1000, t[2]*1000.f); } }