int Test_3x3mulM1M2(void) { // Init an array flanked by guard pages btMatrix3x3 in1[ARRAY_SIZE]; btMatrix3x3 in2[ARRAY_SIZE]; btMatrix3x3 out[ARRAY_SIZE]; btMatrix3x3 out2[ARRAY_SIZE]; // Init the data size_t i, j; for( i = 0; i < ARRAY_SIZE; i++ ) { in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() ); in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() ); out[i] = M3x3mulM1M2_ref(in1[i], in2[i]); out2[i] = (in1[i] * in2[i]); if( out[i] != out2[i] ) { vlog( "Error - M3x3mulM1M2 result error! "); vlog( "failure @ %ld\n", i); btVector3 m0, m1, m2; m0 = out[i].getRow(0); m1 = out[i].getRow(1); m2 = out[i].getRow(2); vlog( "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) " "\n (%10.4f, %10.4f, %10.4f, %10.4f) " "\n (%10.4f, %10.4f, %10.4f, %10.4f) \n", m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3], m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3], m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]); m0 = out2[i].getRow(0); m1 = out2[i].getRow(1); m2 = out2[i].getRow(2); vlog( "\ntested = (%10.4f, %10.4f, %10.4f, %10.4f) " "\n (%10.4f, %10.4f, %10.4f, %10.4f) " "\n (%10.4f, %10.4f, %10.4f, %10.4f) \n", m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3], m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3], m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]); return -1; } } uint64_t scalarTime, vectorTime; uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < LOOPCOUNT; j++) { startTime = ReadTicks(); for( i = 0; i < ARRAY_SIZE; i++ ) out[i] = M3x3mulM1M2_ref(in1[i], in2[i]); currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= LOOPCOUNT; bestTime = -1LL; vectorTime = 0; for (j = 0; j < LOOPCOUNT; j++) { startTime = ReadTicks(); for( i = 0; i < ARRAY_SIZE; i++ ) out2[i] = (in1[i] * in2[i]); currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= LOOPCOUNT; vlog( "Timing:\n" ); vlog( "\t scalar\t vector\n" ); vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE ); return 0; }
int Test_3x3getRot(void) { // Init an array flanked by guard pages btMatrix3x3 in1[ARRAY_SIZE]; btQuaternion out[ARRAY_SIZE]; btQuaternion out2[ARRAY_SIZE]; // Init the data size_t i, j; for (i = 0; i < ARRAY_SIZE; i++) { in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4()); out[i] = btQuaternion(qtNAN_f4()); out2[i] = btQuaternion(qtNAN_f4()); M3x3getRot_ref(in1[i], out[i]); in1[i].getRotation(out2[i]); if (out[i] != out2[i]) { vlog("Error - M3x3getRot result error! "); vlog("failure @ %ld\n", i); vlog( "\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) " "\ntested = (%10.7f, %10.7f, %10.7f, %10.7f) \n", out[i].x(), out[i].y(), out[i].z(), out[i].w(), out2[i].x(), out2[i].y(), out2[i].z(), out2[i].w()); return -1; } } uint64_t scalarTime, vectorTime; uint64_t startTime, bestTime, currentTime; bestTime = ~(bestTime & 0); //-1ULL; scalarTime = 0; for (j = 0; j < LOOPCOUNT; j++) { startTime = ReadTicks(); for (i = 0; i < ARRAY_SIZE; i++) M3x3getRot_ref(in1[i], out[i]); currentTime = ReadTicks() - startTime; scalarTime += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) scalarTime = bestTime; else scalarTime /= LOOPCOUNT; bestTime = ~(bestTime & 0); //-1ULL; vectorTime = 0; for (j = 0; j < LOOPCOUNT; j++) { startTime = ReadTicks(); for (i = 0; i < ARRAY_SIZE; i++) { in1[i].getRotation(out2[i]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) vectorTime = bestTime; else vectorTime /= LOOPCOUNT; vlog("Timing:\n"); vlog("\t scalar\t vector\n"); vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE); return 0; }
int Test_v3norm(void) { btVector3 v1, v2; float x,y,z,w; // Init the data x = RANDF_01; y = RANDF_01; z = RANDF_01; w = BT_NAN; // w channel NaN v1.setValue(x,y,z); v1.setW(w); v2 = v1; btVector3 correct_res, test_res; { float vNaN = BT_NAN; correct_res.setValue(vNaN, vNaN, vNaN); test_res.setValue(vNaN, vNaN, vNaN); correct_res = v3norm_ref(v1); test_res = v2.normalize(); if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) + fabs(correct_res.m_floats[1] - test_res.m_floats[1]) + fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4) { vlog( "Error - v3norm result error! " "\ncorrect = (%10.4f, %10.4f, %10.4f) " "\ntested = (%10.4f, %10.4f, %10.4f) \n", correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2], test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]); return 1; } } #define DATA_SIZE LOOPCOUNT btVector3 vec3_arr0[DATA_SIZE]; btVector3 vec3_arr1[DATA_SIZE]; uint64_t scalarTime; uint64_t vectorTime; size_t j, k; { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { for( k = 0; k < DATA_SIZE; k++ ) { x = RANDF_01; y = RANDF_01; z = RANDF_01; vec3_arr1[k].setValue(x,y,z); vec3_arr1[k].setW(w); } startTime = ReadTicks(); for( k = 0; k+4 <= LOOPCOUNT; k+=4 ) { vec3_arr0[k] = v3norm_ref(vec3_arr1[k]); vec3_arr0[k+1] = v3norm_ref(vec3_arr1[k+1]); vec3_arr0[k+2] = v3norm_ref(vec3_arr1[k+2]); vec3_arr0[k+3] = v3norm_ref(vec3_arr1[k+3]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { for( k = 0; k < DATA_SIZE; k++ ) { x = RANDF_01; y = RANDF_01; z = RANDF_01; vec3_arr1[k].setValue(x,y,z); vec3_arr1[k].setW(w); } startTime = ReadTicks(); for( k = 0; k+4 <= LOOPCOUNT; k+=4 ) { vec3_arr0[k] = vec3_arr1[k].normalize(); vec3_arr0[k+1] = vec3_arr1[k+1].normalize(); vec3_arr0[k+2] = vec3_arr1[k+2].normalize(); vec3_arr0[k+3] = vec3_arr1[k+3].normalize(); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); return 0; }
int Test_mindot(void) { // Init an array flanked by guard pages btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL); float *fp = (float *)data; long correct, test; btVector3 localScaling(0.1f, 0.2f, 0.3f); size_t size; // Init the data size_t i; for (i = 0; i < MAX_SIZE; i++) { fp[4 * i] = (int32_t)RANDF_16; fp[4 * i + 1] = (int32_t)RANDF_16; fp[4 * i + 2] = (int32_t)RANDF_16; fp[4 * i + 3] = BT_NAN; // w channel NaN } float correctDot, testDot; fp = (float *)localScaling; float maxRelativeError = 0.f; for (size = 1; size <= MAX_SIZE; size++) { float *in = (float *)(data + MAX_SIZE - size); size_t position; for (position = 0; position < size; position++) { float *biggest = in + position * 4; float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]}; biggest[0] -= LARGE_FLOAT17; biggest[1] -= LARGE_FLOAT17; biggest[2] -= LARGE_FLOAT17; biggest[3] -= LARGE_FLOAT17; correctDot = BT_NAN; testDot = BT_NAN; correct = mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot); test = localScaling.minDot((btVector3 *)in, size, testDot); if (test < 0 || test >= size) { vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test); continue; } if (correct != test) { vlog("Error @ %ld: index misreported! *%ld vs %ld (*%f, %f)\n", size, correct, test, fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2], fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]); return 1; } if (test != position) { vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test, fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2], fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]); return 1; } if (correctDot != testDot) { float relativeError = btFabs((testDot - correctDot) / correctDot); if (relativeError > 1e6) { vlog("Error @ %ld: dotpr misreported! *%f vs %f (*%f, %f)\n", size, correctDot, testDot, fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2], fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]); return 1; } else { if (maxRelativeError < relativeError) { maxRelativeError = relativeError; } } } memcpy(biggest, old, 16); } } if (maxRelativeError) { printf("Warning: relative error = %e\n", maxRelativeError); } uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)]; uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)]; size_t j, k; float *in = (float *)data; for (size = 1; size <= 32; size++) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTimes[size] = 0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for (k = 0; k < LOOPCOUNT; k++) correct += mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot); currentTime = ReadTicks() - startTime; scalarTimes[size] += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) scalarTimes[size] = bestTime; else scalarTimes[size] /= 100; } uint64_t *timep = &scalarTimes[33]; for (size = 64; size <= MAX_SIZE; size *= 2) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; timep[0] = 0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for (k = 0; k < LOOPCOUNT; k++) correct += mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot); currentTime = ReadTicks() - startTime; timep[0] += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) timep[0] = bestTime; else timep[0] /= 100; timep++; } for (size = 1; size <= 32; size++) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTimes[size] = 0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for (k = 0; k < LOOPCOUNT; k++) test += localScaling.minDot((btVector3 *)in, size, testDot); currentTime = ReadTicks() - startTime; vectorTimes[size] += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) vectorTimes[size] = bestTime; else vectorTimes[size] /= 100; } timep = &vectorTimes[33]; for (size = 64; size <= MAX_SIZE; size *= 2) { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; timep[0] = 0; for (j = 0; j < 100; j++) { startTime = ReadTicks(); for (k = 0; k < LOOPCOUNT; k++) test += localScaling.minDot((btVector3 *)in, size, testDot); currentTime = ReadTicks() - startTime; timep[0] += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) timep[0] = bestTime; else timep[0] /= 100; timep++; } vlog("Timing:\n"); vlog(" size\t scalar\t vector\n"); for (size = 1; size <= 32; size++) vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT); size_t index = 33; for (size = 64; size <= MAX_SIZE; size *= 2) { vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT); index++; } // Useless check to make sure that the timing loops are not optimized away if (test != correct) vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test); GuardFree(data); return 0; }
int Test_v3triple(void) { btVector3 v1, v2, v3; float x,y,z,w; // Init the data x = RANDF_01; y = RANDF_01; z = RANDF_01; w = BT_NAN; // w channel NaN v1.setValue(x,y,z); v1.setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; v2.setValue(x,y,z); v2.setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; v3.setValue(x,y,z); v3.setW(w); float correctTriple0, testTriple0; { correctTriple0 = w; testTriple0 = w; testTriple0 = v3triple_ref(v1,v2,v3); correctTriple0 = v1.triple(v2, v3); if( fabsf(correctTriple0 - testTriple0) > FLT_EPSILON * 4 ) { vlog( "Error - v3triple result error! %f != %f \n", correctTriple0, testTriple0); return 1; } } #define DATA_SIZE 1024 btVector3 v3_arr1[DATA_SIZE]; btVector3 v3_arr2[DATA_SIZE]; btVector3 v3_arr3[DATA_SIZE]; btScalar res_arr[DATA_SIZE]; uint64_t scalarTime; uint64_t vectorTime; size_t j, k; for( k = 0; k < DATA_SIZE; k++ ) { x = RANDF_01; y = RANDF_01; z = RANDF_01; v3_arr1[k].setValue(x,y,z); v3_arr1[k].setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; v3_arr2[k].setValue(x,y,z); v3_arr2[k].setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; v3_arr3[k].setValue(x,y,z); v3_arr3[k].setW(w); } { uint64_t startTime, bestTime, currentTime; bestTime = uint64_t(-1LL); scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for( k = 0; k+4 <= LOOPCOUNT; k+=4 ) { size_t k32 = (k & (DATA_SIZE-1)); res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++; res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++; res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++; res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = uint64_t(-1LL); vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for( k = 0; k+4 <= LOOPCOUNT; k+=4 ) { size_t k32 = k & (DATA_SIZE -1); res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++; res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++; res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++; res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); return 0; }
int Test_3x3timesTranspose(void) { // Init an array flanked by guard pages btMatrix3x3 in1[ARRAY_SIZE]; btMatrix3x3 in2[ARRAY_SIZE]; btMatrix3x3 out[ARRAY_SIZE]; btMatrix3x3 out2[ARRAY_SIZE]; // Init the data size_t i, j; for( i = 0; i < ARRAY_SIZE; i++ ) { in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() ); in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() ); out[i] = timesTranspose(in1[i], in2[i]); out2[i] = in1[i].timesTranspose(in2[i]); if( out[i] != out2[i] ) { printf( "failure @ %ld\n", i); return -1; } } uint64_t scalarTime, vectorTime; uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < LOOPCOUNT; j++) { startTime = ReadTicks(); for( i = 0; i < ARRAY_SIZE; i++ ) out[i] = timesTranspose(in1[i], in2[i]); currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= LOOPCOUNT; bestTime = -1LL; vectorTime = 0; for (j = 0; j < LOOPCOUNT; j++) { startTime = ReadTicks(); for( i = 0; i < ARRAY_SIZE; i++ ) out[i] = in1[i].timesTranspose(in2[i]); currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= LOOPCOUNT; vlog( "Timing:\n" ); vlog( "\t scalar\t vector\n" ); vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE ); return 0; }
int Test_btDbvt(void) { btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE]; btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE]; int i; bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE]; int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE]; for (i = 0; i < DATA_SIZE; i++) { a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX; a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX; a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX; a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX; a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX; a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX; a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX; a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX; b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX; b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX; b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX; b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX; b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX; b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX; b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX; b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX; c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX; c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX; c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX; c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX; c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX; c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX; c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX; c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX; a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0]; a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1]; a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2]; a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3]; a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0]; a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1]; a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2]; a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3]; b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0]; b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1]; b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2]; b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3]; b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0]; b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1]; b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2]; b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3]; c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0]; c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1]; c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2]; c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3]; c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0]; c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1]; c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2]; c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3]; } #if 1 for (i = 0; i < DATA_SIZE; i++) { Intersect_Test_Res[i] = Intersect(a[i], b[i]); Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]); if(Intersect_Test_Res[i] != Intersect_Ref_Res[i]) { printf("Diff on %d\n", i); printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]); printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]); printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]); printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]); printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]); printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]); printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]); printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]); } } #endif uint64_t scalarTime; uint64_t vectorTime; size_t j; //////////////////////////////////// // // Time and Test Intersect // //////////////////////////////////// { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (i = 0; i < DATA_SIZE; i++) { Intersect_Ref_Res[i] = Intersect_ref(a_ref[i], b_ref[i]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (i = 0; i < DATA_SIZE; i++) { Intersect_Test_Res[i] = Intersect(a[i], b[i]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Intersect Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime); for (i = 0; i < DATA_SIZE; i++) { if(Intersect_Test_Res[i] != Intersect_Ref_Res[i]) { printf("Intersect fail at %d\n", i); return 1; } } //////////////////////////////////// // // Time and Test Merge // //////////////////////////////////// { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (i = 0; i < DATA_SIZE; i++) { Merge_ref(a_ref[i], b_ref[i], c_ref[i]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (i = 0; i < DATA_SIZE; i++) { Merge(a[i], b[i], c[i]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Merge Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime); /* c [0] float32_t 0.00455523 [1] float32_t 0.559712 [2] float32_t 0.0795838 [3] float32_t 0.10182 c_ref [0] float32_t 0.00455523 [1] float32_t 0.559712 [2] float32_t 0.0795838 [3] float32_t 0.552081 c [0] float32_t 0.829904 [1] float32_t 0.692891 [2] float32_t 0.961654 [3] float32_t 0.666956 c_ref [0] float32_t 0.829904 [1] float32_t 0.692891 [2] float32_t 0.961654 [3] float32_t 0.522878 */ for (i = 0; i < DATA_SIZE; i++) { //ignore 4th component because it is not computed in all code-paths if( (fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) || (fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) || (fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) || // (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) || (fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) || (fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) || (fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001) //|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001) ) //if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3])) { printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]); printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]); printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]); printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]); printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]); printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]); printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]); printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]); printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]); printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]); printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]); printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]); printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]); return 1; } } //////////////////////////////////// // // Time and Test Select // //////////////////////////////////// { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (i = 0; i < DATA_SIZE; i++) { Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (i = 0; i < DATA_SIZE; i++) { Select_Test_Res[i] = Select(a[i], b[i], c[i]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Select Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime); for (i = 0; i < DATA_SIZE; i++) { Select_Ref_Res[i] = Select_ref(a_ref[i], b_ref[i], c_ref[i]); Select_Test_Res[i] = Select(a[i], b[i], c[i]); if(Select_Test_Res[i] != Select_Ref_Res[i]) { printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]); printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]); printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]); printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]); printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]); printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]); printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]); printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]); printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]); printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]); printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]); printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]); printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]); return 1; } } return 0; }
int Test_v3skew(void) { btVector3 v, v1, v2, v3, vt1, vt2, vt3; float x,y,z,w; // Init the data x = RANDF_01; y = RANDF_01; z = RANDF_01; w = BT_NAN; // w channel NaN v.setValue(x,y,z); v.setW(w); v1.setValue(w,w,w); v1.setW(w); vt3 = vt2 = vt1 = v3 = v2 = v1; { v3skew_ref(&v, &v1, &v2, &v3); v.getSkewSymmetricMatrix(&vt1, &vt2, &vt3); /* if( v1.m_floats[0] != vt1.m_floats[0] || v1.m_floats[1] != vt1.m_floats[1] || v1.m_floats[2] != vt1.m_floats[2] ) */ if(!(v1 == vt1)) { vlog( "Error - v3skew result error! " "\ncorrect v1 = (%10.4f, %10.4f, %10.4f) " "\ntested v1 = (%10.4f, %10.4f, %10.4f) \n", v1.m_floats[0], v1.m_floats[1], v1.m_floats[2], vt1.m_floats[0], vt1.m_floats[1], vt1.m_floats[2]); return 1; } /* if( v2.m_floats[0] != vt2.m_floats[0] || v2.m_floats[1] != vt2.m_floats[1] || v2.m_floats[2] != vt2.m_floats[2] ) */ if(!(v2 == vt2)) { vlog( "Error - v3skew result error! " "\ncorrect v2 = (%10.4f, %10.4f, %10.4f) " "\ntested v2 = (%10.4f, %10.4f, %10.4f) \n", v2.m_floats[0], v2.m_floats[1], v2.m_floats[2], vt2.m_floats[0], vt2.m_floats[1], vt2.m_floats[2]); return 1; } /* if( v3.m_floats[0] != vt3.m_floats[0] || v3.m_floats[1] != vt3.m_floats[1] || v3.m_floats[2] != vt3.m_floats[2] ) */ if(!(v3 == vt3)) { vlog( "Error - v3skew result error! " "\ncorrect v3 = (%10.4f, %10.4f, %10.4f) " "\ntested v3 = (%10.4f, %10.4f, %10.4f) \n", v3.m_floats[0], v3.m_floats[1], v3.m_floats[2], vt3.m_floats[0], vt3.m_floats[1], vt3.m_floats[2]); return 1; } } #define DATA_SIZE 256 btVector3 v3_arr0[DATA_SIZE]; btVector3 v3_arr1[DATA_SIZE]; btVector3 v3_arr2[DATA_SIZE]; btVector3 v3_arr3[DATA_SIZE]; uint64_t scalarTime; uint64_t vectorTime; size_t j, k; for( k = 0; k < DATA_SIZE; k++ ) { x = RANDF_01; y = RANDF_01; z = RANDF_01; v3_arr0[k].setValue(x,y,z); v3_arr0[k].setW(w); v3_arr1[k].setValue(w,w,w); v3_arr1[k].setW(w); v3_arr3[k] = v3_arr2[k] = v3_arr1[k]; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) { size_t k32 = (k & (DATA_SIZE-1)); v3skew_ref( &v3_arr0[k32], &v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) { size_t k32 = (k & (DATA_SIZE -1)); v3_arr0[k32].getSkewSymmetricMatrix(&v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); return 0; }
int Test_v3lerp(void) { btVector3 v1, v2; btScalar rt; float x,y,z,w; float vNaN =BT_NAN; w =BT_NAN; // w channel NaN btVector3 correct_res, test_res; for (rt = 0.0f; rt <= 1.0f; rt += 0.1f) { correct_res.setValue(vNaN, vNaN, vNaN); test_res.setValue(vNaN, vNaN, vNaN); // Init the data x = RANDF_01; y = RANDF_01; z = RANDF_01; v1.setValue(x,y,z); v1.setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; v2.setValue(x,y,z); v2.setW(w); correct_res = v3lerp_ref(correct_res, v1, v2, rt); test_res = v1.lerp(v2, rt); if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) + fabs(correct_res.m_floats[1] - test_res.m_floats[1]) + fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4) { vlog( "Error - v3lerp result error! " "\ncorrect = (%10.4f, %10.4f, %10.4f) " "\ntested = (%10.4f, %10.4f, %10.4f) \n" "\n rt=%10.4f", correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2], test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt); return 1; } } #define DATA_SIZE LOOPCOUNT btVector3 vec3_arr1[DATA_SIZE]; btVector3 vec3_arr2[DATA_SIZE]; btScalar rt_arr[DATA_SIZE]; uint64_t scalarTime; uint64_t vectorTime; size_t j, k; { uint64_t startTime, bestTime, currentTime; w =BT_NAN; // w channel NaN bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { for( k = 0; k < DATA_SIZE; k++ ) { x = RANDF_01; y = RANDF_01; z = RANDF_01; vec3_arr1[k].setValue(x,y,z); vec3_arr1[k].setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; vec3_arr2[k].setValue(x,y,z); vec3_arr2[k].setW(w); rt_arr[k] = RANDF_01; } startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) { v3lerp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { for( k = 0; k < DATA_SIZE; k++ ) { x = RANDF_01; y = RANDF_01; z = RANDF_01; vec3_arr1[k].setValue(x,y,z); vec3_arr1[k].setW(w); x = RANDF_01; y = RANDF_01; z = RANDF_01; vec3_arr2[k].setValue(x,y,z); vec3_arr2[k].setW(w); rt_arr[k] = RANDF_01; } startTime = ReadTicks(); for( k = 0; k < LOOPCOUNT; k++ ) { vec3_arr1[k] = vec3_arr1[k].lerp(vec3_arr2[k], rt_arr[k]); } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if( currentTime < bestTime ) bestTime = currentTime; } if( 0 == gReportAverageTimes ) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog( "Timing:\n" ); vlog( " \t scalar\t vector\n" ); vlog( " \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT ); return 0; }
int Test_qtdot(void) { btQuaternion q1, q2; float x, y, z, w, vNaN; vNaN = BT_NAN; // w channel NaN // Init the data x = RANDF_01; y = RANDF_01; z = RANDF_01; w = RANDF_01; q1.setValue(x, y, z, w); x = RANDF_01; y = RANDF_01; z = RANDF_01; w = RANDF_01; q2.setValue(x, y, z, w); btScalar correct_res, test_res; { correct_res = vNaN; test_res = vNaN; correct_res = qtdot_ref(q1, q2); test_res = BT_OP(q1, q2); if (fabsf(correct_res - test_res) > FLT_EPSILON * 4) { vlog( "Error - qtdot result error! " "\ncorrect = %10.4f " "\ntested = %10.4f \n", correct_res, test_res); return 1; } } #define DATA_SIZE LOOPCOUNT btQuaternion qt_arr1[DATA_SIZE]; btQuaternion qt_arr2[DATA_SIZE]; btScalar res_arr[DATA_SIZE]; uint64_t scalarTime; uint64_t vectorTime; size_t j, k; for (k = 0; k < DATA_SIZE; k++) { x = RANDF_01; y = RANDF_01; z = RANDF_01; w = RANDF_01; qt_arr1[k].setValue(x, y, z, w); x = RANDF_01; y = RANDF_01; z = RANDF_01; w = RANDF_01; qt_arr2[k].setValue(x, y, z, w); } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; scalarTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (k = 0; k + 4 <= LOOPCOUNT; k += 4) { size_t km = (k & (DATA_SIZE - 1)); res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]); km++; res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]); km++; res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]); km++; res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]); } currentTime = ReadTicks() - startTime; scalarTime += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) scalarTime = bestTime; else scalarTime /= NUM_CYCLES; } { uint64_t startTime, bestTime, currentTime; bestTime = -1LL; vectorTime = 0; for (j = 0; j < NUM_CYCLES; j++) { startTime = ReadTicks(); for (k = 0; k + 4 <= LOOPCOUNT; k += 4) { size_t km = (k & (DATA_SIZE - 1)); res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]); km++; res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]); km++; res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]); km++; res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]); km++; } currentTime = ReadTicks() - startTime; vectorTime += currentTime; if (currentTime < bestTime) bestTime = currentTime; } if (0 == gReportAverageTimes) vectorTime = bestTime; else vectorTime /= NUM_CYCLES; } vlog("Timing:\n"); vlog(" \t scalar\t vector\n"); vlog(" \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT, TicksToCycles(vectorTime) / LOOPCOUNT); return 0; }