int main(int argc, const char * argv[]) { uint32_t maxThreads = GetNumHardwareThreads(); double* avSpeedUps = new double[ maxThreads ]; for( uint32_t numThreads = 1; numThreads <= maxThreads; ++numThreads ) { g_TS.Initialize(numThreads); double avSpeedUp = 0.0; for( int run = 0; run< REPEATS; ++run ) { printf("Run %d.....\n", run); Timer tParallel; tParallel.Start(); ParallelReductionSumTaskSet m_ParallelReductionSumTaskSet( 10 * 1024 * 1024 ); g_TS.AddTaskSetToPipe( &m_ParallelReductionSumTaskSet ); g_TS.WaitforTaskSet( &m_ParallelReductionSumTaskSet ); tParallel.Stop(); printf("Parallel Example complete in \t%fms,\t sum: %" PRIu64 "\n", tParallel.GetTimeMS(), m_ParallelReductionSumTaskSet.m_FinalSum ); Timer tSerial; tSerial.Start(); uint64_t sum = 0; for( uint64_t i = 0; i < (uint64_t)m_ParallelReductionSumTaskSet.m_ParallelSumTaskSet.m_SetSize; ++i ) { sum += i + 1; } tSerial.Stop(); if( run >= WARMUPS ) { avSpeedUp += tSerial.GetTimeMS() / tParallel.GetTimeMS() / RUNS; } printf("Serial Example complete in \t%fms,\t sum: %" PRIu64 "\n", tSerial.GetTimeMS(), sum ); printf("Speed Up Serial / Parallel: %f\n\n", tSerial.GetTimeMS() / tParallel.GetTimeMS() ); } avSpeedUps[numThreads-1] = avSpeedUp; printf("\nAverage Speed Up for %d Hardware Threads Serial / Parallel: %f\n", numThreads, avSpeedUp ); } printf("\nHardware Threads, Av Speed Up/s\n" ); for( uint32_t numThreads = 1; numThreads <= maxThreads; ++numThreads ) { printf("%d, %f\n", numThreads, avSpeedUps[numThreads-1] ); } return 0; }
virtual void ExecuteRange( TaskSetPartition range, uint32_t threadnum ) { g_TS.AddTaskSetToPipe( &m_ParallelSumTaskSet ); g_TS.WaitforTaskSet( &m_ParallelSumTaskSet ); for( uint32_t i = 0; i < m_ParallelSumTaskSet.m_NumPartialSums; ++i ) { m_FinalSum += m_ParallelSumTaskSet.m_pPartialSums[i].count; } }
int main(int argc, const char * argv[]) { Remotery* rmt; rmt_CreateGlobalInstance(&rmt); // Set the callbacks BEFORE initialize or we will get no threadstart nor first waitStart calls g_TS.GetProfilerCallbacks()->threadStart = threadStartCallback; g_TS.GetProfilerCallbacks()->waitStart = waitStartCallback; g_TS.GetProfilerCallbacks()->waitStop = waitStopCallback; g_TS.Initialize(); rmt_SetCurrentThreadName("Main"); double avSpeedUp = 0.0; for( int run = 0; run< RUNS; ++run ) { rmt_ScopedCPUSample(Run); printf("Run %d.....\n", run); ParallelReductionSumTaskSet m_ParallelReductionSumTaskSet( SUMS ); { rmt_ScopedCPUSample(Parallel); m_ParallelReductionSumTaskSet.Init(); g_TS.AddTaskSetToPipe(&m_ParallelReductionSumTaskSet); g_TS.WaitforTaskSet(&m_ParallelReductionSumTaskSet); } volatile uint64_t sum = 0; { rmt_ScopedCPUSample(Serial); for (uint64_t i = 0; i < (uint64_t)m_ParallelReductionSumTaskSet.m_ParallelSumTaskSet.m_SetSize; ++i) { sum += i + 1; } } } rmt_DestroyGlobalInstance(rmt); return 0; }