void AdlPrimitivesDemo::testFill2( Buffer<int2>& buf, int size, Stopwatch& sw ) { MyFill::Data* sortData = MyFill::allocate( m_deviceData ); sw.start(); MyFill::execute( sortData, buf, make_int2(12, 13), size ); sw.stop(); MyFill::deallocate( sortData ); { float t = sw.getMs(); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "Fill int2: %3.2fGB/s (%3.2fms)", size/t/1000/1000*8, t); } }
void AdlPrimitivesDemo::testSort( Buffer<SortData>& buf, int size, Stopwatch& sw ) { MySort::Data* sortData = MySort::allocate( m_deviceData, size, RadixSortBase::SORT_ADVANCED ); sw.start(); MySort::execute( sortData, buf, size ); sw.stop(); MySort::deallocate( sortData ); { m_nTxtLines = 0; float t = sw.getMs(); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "%d: %3.2fms, %3.2fMKeys/s", size, t, size/t/1000); } }
void AdlPrimitivesDemo::test( Buffer<int2>& buf, int size, Stopwatch& sw ) { Kernel* kernel = KernelManager::query( m_deviceData, "..\\..\\AdlDemos\\TestBed\\Demos\\AdlPrimitivesDemoKernel", "FillInt4Kernel" ); Buffer<int4> constBuffer( m_deviceData, 1, BufferBase::BUFFER_CONST ); int numGroups = (size+128*4-1)/(128*4); Buffer<u32> workBuffer0( m_deviceData, numGroups*(16) ); Buffer<u32> workBuffer1( m_deviceData, numGroups*(16) ); Buffer<int2> sortBuffer( m_deviceData, size ); { int2* host = new int2[size]; for(int i=0; i<size; i++) { host[i] = make_int2( getRandom(0, 0xf), i ); } sortBuffer.write( host, size ); DeviceUtils::waitForCompletion( m_deviceData ); delete [] host; } int4 constData; { constData.x = size; constData.y = 0; constData.z = numGroups; constData.w = 0; } sw.start(); int nThreads = size/4; { BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.split(); { constData.w = 1; int nThreads = size/4; BufferInfo bInfo[] = { BufferInfo( &buf ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.split(); { constData.w = 2; int nThreads = size/4; BufferInfo bInfo[] = { BufferInfo( &sortBuffer ), BufferInfo( &workBuffer0 ), BufferInfo( &workBuffer1 ) }; Launcher launcher( m_deviceData, kernel ); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(Launcher::BufferInfo) ); launcher.setConst( constBuffer, constData ); launcher.launch1D( nThreads, 128 ); } sw.stop(); { int2* host = new int2[size]; buf.read( host, size ); DeviceUtils::waitForCompletion( m_deviceData ); for(int i=0; i<128*4-1; i++) { ADLASSERT( host[i].x <= host[i+1].x ); } delete [] host; } { float t[3]; sw.getMs(t, 3); // (byte * nElems) sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "LoadStore: %3.2fGB/s (%3.2fns)", (4*8*2)*nThreads/t[0]/1000/1000, t[0]*1000.f); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "GenHistog: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[1]/1000/1000, t[1]*1000.f); sprintf_s(m_txtBuffer[m_nTxtLines++], LINE_CAPACITY, "FullSort: %3.2fGB/s (%3.2fns)", (4*(8*2+2))*nThreads/t[2]/1000/1000, t[2]*1000.f); } }