bool _compareResults(int numBodies) { assert(m_nbodyCuda); bool passed = true; m_nbody->update(0.001f); { m_nbodyCpu = new BodySystemCPU<T>(numBodies); m_nbodyCpu->setArray(BODYSYSTEM_POSITION, m_hPos); m_nbodyCpu->setArray(BODYSYSTEM_VELOCITY, m_hVel); m_nbodyCpu->update(0.001f); T *cudaPos = m_nbodyCuda->getArray(BODYSYSTEM_POSITION); T *cpuPos = m_nbodyCpu->getArray(BODYSYSTEM_POSITION); T tolerance = 0.0005f; for (int i = 0; i < numBodies; i++) { if (fabs(cpuPos[i] - cudaPos[i]) > tolerance) { passed = false; printf("Error: (host)%f != (device)%f\n", cpuPos[i], cudaPos[i]); } } } return passed; }
//***************************************************************************** bool CompareResults(int numBodies) { // Run computation on the device/GPU shrLog(" Computing on the Device / GPU...\n"); nbodyGPU->update(0.001f); nbodyGPU->synchronizeThreads(); // Write out device/GPU data file for regression analysis shrLog(" Writing out Device/GPU data file for analysis...\n"); float* fGPUData = nbodyGPU->getArray(BodySystem::BODYSYSTEM_POSITION); shrWriteFilef( "oclNbody_Regression.dat", fGPUData, numBodies, 0.0, false); // Run computation on the host CPU shrLog(" Computing on the Host / CPU...\n\n"); BodySystemCPU* nbodyCPU = new BodySystemCPU(numBodies); nbodyCPU->setArray(BodySystem::BODYSYSTEM_POSITION, hPos); nbodyCPU->setArray(BodySystem::BODYSYSTEM_VELOCITY, hVel); nbodyCPU->update(0.001f); // Check if result matches shrBOOL bMatch = shrComparefe(fGPUData, nbodyGPU->getArray(BodySystem::BODYSYSTEM_POSITION), numBodies, .001f); shrLog("Results %s\n\n", (shrTRUE == bMatch) ? "Match" : "do not match!"); // Cleanup local allocation if(nbodyCPU)delete nbodyCPU; return (shrTRUE == bMatch); }