Пример #1
0
int Test_3x3mulM1M2(void)
{
    // Init an array flanked by guard pages
    btMatrix3x3 in1[ARRAY_SIZE];
    btMatrix3x3 in2[ARRAY_SIZE];
    btMatrix3x3 out[ARRAY_SIZE];
    btMatrix3x3 out2[ARRAY_SIZE];
    
    // Init the data
    size_t i, j;
    for( i = 0; i < ARRAY_SIZE; i++ )
    {
        in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );   
        in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );   
        
        out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
        out2[i] = (in1[i] * in2[i]);
        
        if( out[i] != out2[i] )
        {
 			vlog( "Error - M3x3mulM1M2 result error! ");
            vlog( "failure @ %ld\n", i);
            btVector3 m0, m1, m2;
            m0 = out[i].getRow(0);
            m1 = out[i].getRow(1);
            m2 = out[i].getRow(2);
            
            vlog(   "\ncorrect = (%10.4f, %10.4f, %10.4f, %10.4f) "
					"\n          (%10.4f, %10.4f, %10.4f, %10.4f) "
                    "\n          (%10.4f, %10.4f, %10.4f, %10.4f) \n",
                    m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3], 
                    m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
                    m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]); 

            m0 = out2[i].getRow(0);
            m1 = out2[i].getRow(1);
            m2 = out2[i].getRow(2);
					
            vlog(   "\ntested  = (%10.4f, %10.4f, %10.4f, %10.4f) "
					"\n          (%10.4f, %10.4f, %10.4f, %10.4f) " 
					"\n          (%10.4f, %10.4f, %10.4f, %10.4f) \n", 
					m0.m_floats[0], m0.m_floats[1], m0.m_floats[2], m0.m_floats[3], 
                    m1.m_floats[0], m1.m_floats[1], m1.m_floats[2], m1.m_floats[3],
                    m2.m_floats[0], m2.m_floats[1], m2.m_floats[2], m2.m_floats[3]); 

            return -1;
        }
    }
    
    uint64_t scalarTime, vectorTime;
    uint64_t startTime, bestTime, currentTime;
    bestTime = -1LL;
    scalarTime = 0;
    for (j = 0; j < LOOPCOUNT; j++) 
    {
        startTime = ReadTicks();
        for( i = 0; i < ARRAY_SIZE; i++ )
            out[i] = M3x3mulM1M2_ref(in1[i], in2[i]);
        currentTime = ReadTicks() - startTime;
        scalarTime += currentTime;
        if( currentTime < bestTime )
            bestTime = currentTime;
    }
    if( 0 == gReportAverageTimes )
        scalarTime = bestTime;        
    else
        scalarTime /= LOOPCOUNT;
    
    bestTime = -1LL;
    vectorTime = 0;
    for (j = 0; j < LOOPCOUNT; j++) 
    {
        startTime = ReadTicks();
        for( i = 0; i < ARRAY_SIZE; i++ )
            out2[i] = (in1[i] * in2[i]);
        currentTime = ReadTicks() - startTime;
        vectorTime += currentTime;
        if( currentTime < bestTime )
            bestTime = currentTime;
    }
    if( 0 == gReportAverageTimes )
        vectorTime = bestTime;        
    else
        vectorTime /= LOOPCOUNT;
    
    vlog( "Timing:\n" );
    vlog( "\t    scalar\t    vector\n" );
    vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
    
    return 0;
}
Пример #2
0
int Test_3x3getRot(void)
{
	// Init an array flanked by guard pages
	btMatrix3x3 in1[ARRAY_SIZE];
	btQuaternion out[ARRAY_SIZE];
	btQuaternion out2[ARRAY_SIZE];

	// Init the data
	size_t i, j;
	for (i = 0; i < ARRAY_SIZE; i++)
	{
		in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4());
		out[i] = btQuaternion(qtNAN_f4());
		out2[i] = btQuaternion(qtNAN_f4());

		M3x3getRot_ref(in1[i], out[i]);
		in1[i].getRotation(out2[i]);

		if (out[i] != out2[i])
		{
			vlog("Error - M3x3getRot result error! ");
			vlog("failure @ %ld\n", i);
			vlog(
				"\ncorrect = (%10.7f, %10.7f, %10.7f, %10.7f) "
				"\ntested  = (%10.7f, %10.7f, %10.7f, %10.7f) \n",
				out[i].x(), out[i].y(), out[i].z(), out[i].w(),
				out2[i].x(), out2[i].y(), out2[i].z(), out2[i].w());

			return -1;
		}
	}

	uint64_t scalarTime, vectorTime;
	uint64_t startTime, bestTime, currentTime;
	bestTime = ~(bestTime & 0);  //-1ULL;
	scalarTime = 0;
	for (j = 0; j < LOOPCOUNT; j++)
	{
		startTime = ReadTicks();
		for (i = 0; i < ARRAY_SIZE; i++)
			M3x3getRot_ref(in1[i], out[i]);
		currentTime = ReadTicks() - startTime;
		scalarTime += currentTime;
		if (currentTime < bestTime)
			bestTime = currentTime;
	}
	if (0 == gReportAverageTimes)
		scalarTime = bestTime;
	else
		scalarTime /= LOOPCOUNT;

	bestTime = ~(bestTime & 0);  //-1ULL;
	vectorTime = 0;
	for (j = 0; j < LOOPCOUNT; j++)
	{
		startTime = ReadTicks();
		for (i = 0; i < ARRAY_SIZE; i++)
		{
			in1[i].getRotation(out2[i]);
		}
		currentTime = ReadTicks() - startTime;
		vectorTime += currentTime;
		if (currentTime < bestTime)
			bestTime = currentTime;
	}
	if (0 == gReportAverageTimes)
		vectorTime = bestTime;
	else
		vectorTime /= LOOPCOUNT;

	vlog("Timing:\n");
	vlog("\t    scalar\t    vector\n");
	vlog("\t%10.2f\t%10.2f\n", TicksToCycles(scalarTime) / ARRAY_SIZE, TicksToCycles(vectorTime) / ARRAY_SIZE);

	return 0;
}
Пример #3
0
int Test_v3norm(void)
{
    btVector3 v1, v2;
   
    float x,y,z,w;
    
    // Init the data
    x = RANDF_01;
    y = RANDF_01;
    z = RANDF_01;
    w = BT_NAN;     // w channel NaN
    v1.setValue(x,y,z);
	v1.setW(w);

    v2 = v1; 

    btVector3 correct_res, test_res;
	 
    {
		float vNaN = BT_NAN;
		correct_res.setValue(vNaN, vNaN, vNaN); 
		test_res.setValue(vNaN, vNaN, vNaN);
		correct_res = v3norm_ref(v1);
		test_res = v2.normalize();
	   
		if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +  
			fabs(correct_res.m_floats[1] - test_res.m_floats[1]) + 
			fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
		{	
			vlog( "Error - v3norm result error! "
					"\ncorrect = (%10.4f, %10.4f, %10.4f) "
					"\ntested  = (%10.4f, %10.4f, %10.4f) \n", 
					correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2], 
					test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2]);
		
			return 1;
		}
	}
    
#define DATA_SIZE LOOPCOUNT

	btVector3 vec3_arr0[DATA_SIZE];
	btVector3 vec3_arr1[DATA_SIZE];

    uint64_t scalarTime;
    uint64_t vectorTime;
    size_t j, k;

	{
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
			for( k = 0; k < DATA_SIZE; k++ )
			{
				x = RANDF_01;
				y = RANDF_01;
				z = RANDF_01;
				vec3_arr1[k].setValue(x,y,z);
				vec3_arr1[k].setW(w);
			}

            startTime = ReadTicks();
            for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
			{
	             vec3_arr0[k] = v3norm_ref(vec3_arr1[k]);
	             vec3_arr0[k+1] = v3norm_ref(vec3_arr1[k+1]);
	             vec3_arr0[k+2] = v3norm_ref(vec3_arr1[k+2]);
	             vec3_arr0[k+3] = v3norm_ref(vec3_arr1[k+3]);
			}
			currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;        
        else
            scalarTime /= NUM_CYCLES;
    }
    
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
			for( k = 0; k < DATA_SIZE; k++ )
			{
				x = RANDF_01;
				y = RANDF_01;
				z = RANDF_01;
				vec3_arr1[k].setValue(x,y,z);
				vec3_arr1[k].setW(w);
			}

            startTime = ReadTicks();
            for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
			{
				vec3_arr0[k] = vec3_arr1[k].normalize();
				vec3_arr0[k+1] = vec3_arr1[k+1].normalize();
				vec3_arr0[k+2] = vec3_arr1[k+2].normalize();
				vec3_arr0[k+3] = vec3_arr1[k+3].normalize();
			}
			currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;        
        else
            vectorTime /= NUM_CYCLES;
    }

    vlog( "Timing:\n" );
    vlog( "     \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, 
									TicksToCycles( vectorTime ) / LOOPCOUNT );

    return 0;
}
Пример #4
0
int Test_mindot(void)
{
	// Init an array flanked by guard pages
	btSimdFloat4 *data = (btSimdFloat4 *)GuardCalloc(1, MAX_SIZE * sizeof(btSimdFloat4), NULL);
	float *fp = (float *)data;
	long correct, test;
	btVector3 localScaling(0.1f, 0.2f, 0.3f);
	size_t size;

	// Init the data
	size_t i;
	for (i = 0; i < MAX_SIZE; i++)
	{
		fp[4 * i] = (int32_t)RANDF_16;
		fp[4 * i + 1] = (int32_t)RANDF_16;
		fp[4 * i + 2] = (int32_t)RANDF_16;
		fp[4 * i + 3] = BT_NAN;  // w channel NaN
	}

	float correctDot, testDot;
	fp = (float *)localScaling;
	float maxRelativeError = 0.f;

	for (size = 1; size <= MAX_SIZE; size++)
	{
		float *in = (float *)(data + MAX_SIZE - size);
		size_t position;

		for (position = 0; position < size; position++)
		{
			float *biggest = in + position * 4;
			float old[4] = {biggest[0], biggest[1], biggest[2], biggest[3]};
			biggest[0] -= LARGE_FLOAT17;
			biggest[1] -= LARGE_FLOAT17;
			biggest[2] -= LARGE_FLOAT17;
			biggest[3] -= LARGE_FLOAT17;

			correctDot = BT_NAN;
			testDot = BT_NAN;
			correct = mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
			test = localScaling.minDot((btVector3 *)in, size, testDot);
			if (test < 0 || test >= size)
			{
				vlog("Error @ %ld: index out of bounds! *%ld vs %ld \n", size, correct, test);
				continue;
			}
			if (correct != test)
			{
				vlog("Error @ %ld: index misreported! *%ld vs %ld  (*%f, %f)\n", size, correct, test,
					 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
					 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
				return 1;
			}
			if (test != position)
			{
				vlog("Biggest not found where it is supposed to be: *%ld vs %ld (*%f, %f)\n", position, test,
					 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2],
					 fp[0] * in[4 * position] + fp[1] * in[4 * position + 1] + fp[2] * in[4 * position + 2]);
				return 1;
			}

			if (correctDot != testDot)
			{
				float relativeError = btFabs((testDot - correctDot) / correctDot);
				if (relativeError > 1e6)
				{
					vlog("Error @ %ld: dotpr misreported! *%f vs %f    (*%f, %f)\n", size, correctDot, testDot,
						 fp[0] * in[4 * correct] + fp[1] * in[4 * correct + 1] + fp[2] * in[4 * correct + 2],
						 fp[0] * in[4 * test] + fp[1] * in[4 * test + 1] + fp[2] * in[4 * test + 2]);
					return 1;
				}
				else
				{
					if (maxRelativeError < relativeError)
					{
						maxRelativeError = relativeError;
					}
				}
			}

			memcpy(biggest, old, 16);
		}
	}

	if (maxRelativeError)
	{
		printf("Warning: relative error = %e\n", maxRelativeError);
	}
	uint64_t scalarTimes[33 + (MAX_LOG2_SIZE - 5)];
	uint64_t vectorTimes[33 + (MAX_LOG2_SIZE - 5)];
	size_t j, k;
	float *in = (float *)data;
	for (size = 1; size <= 32; size++)
	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		scalarTimes[size] = 0;
		for (j = 0; j < 100; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k < LOOPCOUNT; k++)
				correct += mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
			currentTime = ReadTicks() - startTime;
			scalarTimes[size] += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			scalarTimes[size] = bestTime;
		else
			scalarTimes[size] /= 100;
	}

	uint64_t *timep = &scalarTimes[33];
	for (size = 64; size <= MAX_SIZE; size *= 2)
	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		timep[0] = 0;
		for (j = 0; j < 100; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k < LOOPCOUNT; k++)
				correct += mindot_ref((btSimdFloat4 *)in, (float *)&localScaling, size, &correctDot);
			currentTime = ReadTicks() - startTime;
			timep[0] += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			timep[0] = bestTime;
		else
			timep[0] /= 100;

		timep++;
	}

	for (size = 1; size <= 32; size++)
	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		vectorTimes[size] = 0;
		for (j = 0; j < 100; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k < LOOPCOUNT; k++)
				test += localScaling.minDot((btVector3 *)in, size, testDot);
			currentTime = ReadTicks() - startTime;
			vectorTimes[size] += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			vectorTimes[size] = bestTime;
		else
			vectorTimes[size] /= 100;
	}

	timep = &vectorTimes[33];
	for (size = 64; size <= MAX_SIZE; size *= 2)
	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		timep[0] = 0;
		for (j = 0; j < 100; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k < LOOPCOUNT; k++)
				test += localScaling.minDot((btVector3 *)in, size, testDot);
			currentTime = ReadTicks() - startTime;
			timep[0] += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			timep[0] = bestTime;
		else
			timep[0] /= 100;

		timep++;
	}

	vlog("Timing:\n");
	vlog(" size\t    scalar\t    vector\n");
	for (size = 1; size <= 32; size++)
		vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[size]) / LOOPCOUNT, TicksToCycles(vectorTimes[size]) / LOOPCOUNT);
	size_t index = 33;
	for (size = 64; size <= MAX_SIZE; size *= 2)
	{
		vlog("%5lu\t%10.2f\t%10.2f\n", size, TicksToCycles(scalarTimes[index]) / LOOPCOUNT, TicksToCycles(vectorTimes[index]) / LOOPCOUNT);
		index++;
	}

	// Useless check to make sure that the timing loops are not optimized away
	if (test != correct)
		vlog("Error: Test != correct: *%ld vs. %ld\n", correct, test);

	GuardFree(data);

	return 0;
}
Пример #5
0
int Test_v3triple(void)
{
    btVector3 v1, v2, v3;
   
    float x,y,z,w;
    
    // Init the data
    x = RANDF_01;
    y = RANDF_01;
    z = RANDF_01;
    w = BT_NAN;     // w channel NaN
    v1.setValue(x,y,z);
	v1.setW(w);

    x = RANDF_01;
    y = RANDF_01;
    z = RANDF_01;
    v2.setValue(x,y,z);
	v2.setW(w);

    x = RANDF_01;
    y = RANDF_01;
    z = RANDF_01;
    v3.setValue(x,y,z);
	v3.setW(w);
	
    float correctTriple0, testTriple0;
	 
    {
		correctTriple0 = w;
		testTriple0 = w;
		testTriple0 = v3triple_ref(v1,v2,v3);
		correctTriple0 = v1.triple(v2, v3);
	   
		if( fabsf(correctTriple0 - testTriple0) > FLT_EPSILON * 4 )
		{
			vlog( "Error - v3triple result error! %f != %f \n", correctTriple0, testTriple0);
		
			return 1;
		}
	}
    
#define DATA_SIZE 1024

	btVector3 v3_arr1[DATA_SIZE];
	btVector3 v3_arr2[DATA_SIZE];
	btVector3 v3_arr3[DATA_SIZE];
    btScalar  res_arr[DATA_SIZE];

    uint64_t scalarTime;
    uint64_t vectorTime;
    size_t j, k;

	for( k = 0; k < DATA_SIZE; k++ )
	{
        x = RANDF_01;
        y = RANDF_01;
        z = RANDF_01;
		v3_arr1[k].setValue(x,y,z);
		v3_arr1[k].setW(w);

        x = RANDF_01;
        y = RANDF_01;
        z = RANDF_01;
		v3_arr2[k].setValue(x,y,z);
		v3_arr2[k].setW(w);

        x = RANDF_01;
        y = RANDF_01;
        z = RANDF_01;
		v3_arr3[k].setValue(x,y,z);
		v3_arr3[k].setW(w);
	}
    
	{
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = uint64_t(-1LL);
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
			{
				size_t k32 = (k & (DATA_SIZE-1)); 
                res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
				res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
				res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); k32++;
				res_arr[k32] = v3triple_ref( v3_arr1[k32], v3_arr2[k32], v3_arr3[k32]); 
			}
			currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;        
        else
            scalarTime /= NUM_CYCLES;
    }
    
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = uint64_t(-1LL);
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            for( k = 0; k+4 <= LOOPCOUNT; k+=4 )
			{
				size_t k32 = k & (DATA_SIZE -1); 
                res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
                res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
                res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); k32++;
                res_arr[k32] = v3_arr1[k32].triple(v3_arr2[k32], v3_arr3[k32]); 
			}
			currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;        
        else
            vectorTime /= NUM_CYCLES;
    }

    vlog( "Timing:\n" );
    vlog( "     \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );

    return 0;
}
Пример #6
0
int Test_3x3timesTranspose(void)
{
    // Init an array flanked by guard pages
    btMatrix3x3 in1[ARRAY_SIZE];
    btMatrix3x3 in2[ARRAY_SIZE];
    btMatrix3x3 out[ARRAY_SIZE];
    btMatrix3x3 out2[ARRAY_SIZE];
    
    // Init the data
    size_t i, j;
    for( i = 0; i < ARRAY_SIZE; i++ )
    {
        in1[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );   
        in2[i] = btMatrix3x3(rand_f4(), rand_f4(), rand_f4() );   
        
        out[i] = timesTranspose(in1[i], in2[i]);
        out2[i] = in1[i].timesTranspose(in2[i]);
        
        if( out[i] != out2[i] )
        {
            printf( "failure @ %ld\n", i);
            return -1;
        }
    }
    
    uint64_t scalarTime, vectorTime;
    uint64_t startTime, bestTime, currentTime;
    bestTime = -1LL;
    scalarTime = 0;
    for (j = 0; j < LOOPCOUNT; j++) {
        startTime = ReadTicks();
        for( i = 0; i < ARRAY_SIZE; i++ )
            out[i] = timesTranspose(in1[i], in2[i]);
        currentTime = ReadTicks() - startTime;
        scalarTime += currentTime;
        if( currentTime < bestTime )
            bestTime = currentTime;
    }
    if( 0 == gReportAverageTimes )
        scalarTime = bestTime;        
    else
        scalarTime /= LOOPCOUNT;
    
    bestTime = -1LL;
    vectorTime = 0;
    for (j = 0; j < LOOPCOUNT; j++) {
        startTime = ReadTicks();
        for( i = 0; i < ARRAY_SIZE; i++ )
            out[i] = in1[i].timesTranspose(in2[i]);
        currentTime = ReadTicks() - startTime;
        vectorTime += currentTime;
        if( currentTime < bestTime )
            bestTime = currentTime;
    }
    if( 0 == gReportAverageTimes )
        vectorTime = bestTime;        
    else
        vectorTime /= LOOPCOUNT;
    
    vlog( "Timing:\n" );
    vlog( "\t    scalar\t    vector\n" );
    vlog( "\t%10.2f\t%10.2f\n", TicksToCycles( scalarTime ) / ARRAY_SIZE, TicksToCycles( vectorTime ) / ARRAY_SIZE );
    
    return 0;
}
Пример #7
0
int Test_btDbvt(void)
{
    btDbvtAabbMm a[DATA_SIZE], b[DATA_SIZE], c[DATA_SIZE];
    btDbvtAabbMm a_ref[DATA_SIZE], b_ref[DATA_SIZE], c_ref[DATA_SIZE];
        
    int i;
        
    bool Intersect_Test_Res[DATA_SIZE], Intersect_Ref_Res[DATA_SIZE];
    int Select_Test_Res[DATA_SIZE], Select_Ref_Res[DATA_SIZE];
    
    
    for (i = 0; i < DATA_SIZE; i++)
    {
        a[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
        a[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
        a[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
        a[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
        
        a[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
        a[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
        a[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
        a[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
        
        b[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
        b[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
        b[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
        b[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
        
        b[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
        b[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
        b[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
        b[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
        
        c[i].tMins().m_floats[0] = (float)rand() / (float)RAND_MAX;
        c[i].tMins().m_floats[1] = (float)rand() / (float)RAND_MAX;
        c[i].tMins().m_floats[2] = (float)rand() / (float)RAND_MAX;
        c[i].tMins().m_floats[3] = (float)rand() / (float)RAND_MAX;
        
        c[i].tMaxs().m_floats[0] = (float)rand() / (float)RAND_MAX;
        c[i].tMaxs().m_floats[1] = (float)rand() / (float)RAND_MAX;
        c[i].tMaxs().m_floats[2] = (float)rand() / (float)RAND_MAX;
        c[i].tMaxs().m_floats[3] = (float)rand() / (float)RAND_MAX;
        
        
        a_ref[i].tMins().m_floats[0] = a[i].tMins().m_floats[0];
        a_ref[i].tMins().m_floats[1] = a[i].tMins().m_floats[1];
        a_ref[i].tMins().m_floats[2] = a[i].tMins().m_floats[2];
        a_ref[i].tMins().m_floats[3] = a[i].tMins().m_floats[3];
        
        a_ref[i].tMaxs().m_floats[0] = a[i].tMaxs().m_floats[0];
        a_ref[i].tMaxs().m_floats[1] = a[i].tMaxs().m_floats[1];
        a_ref[i].tMaxs().m_floats[2] = a[i].tMaxs().m_floats[2];
        a_ref[i].tMaxs().m_floats[3] = a[i].tMaxs().m_floats[3];
        
        b_ref[i].tMins().m_floats[0] = b[i].tMins().m_floats[0];
        b_ref[i].tMins().m_floats[1] = b[i].tMins().m_floats[1];
        b_ref[i].tMins().m_floats[2] = b[i].tMins().m_floats[2];
        b_ref[i].tMins().m_floats[3] = b[i].tMins().m_floats[3];
        
        b_ref[i].tMaxs().m_floats[0] = b[i].tMaxs().m_floats[0];
        b_ref[i].tMaxs().m_floats[1] = b[i].tMaxs().m_floats[1];
        b_ref[i].tMaxs().m_floats[2] = b[i].tMaxs().m_floats[2];
        b_ref[i].tMaxs().m_floats[3] = b[i].tMaxs().m_floats[3];
        
        c_ref[i].tMins().m_floats[0] = c[i].tMins().m_floats[0];
        c_ref[i].tMins().m_floats[1] = c[i].tMins().m_floats[1];
        c_ref[i].tMins().m_floats[2] = c[i].tMins().m_floats[2];
        c_ref[i].tMins().m_floats[3] = c[i].tMins().m_floats[3];
        
        c_ref[i].tMaxs().m_floats[0] = c[i].tMaxs().m_floats[0];
        c_ref[i].tMaxs().m_floats[1] = c[i].tMaxs().m_floats[1];
        c_ref[i].tMaxs().m_floats[2] = c[i].tMaxs().m_floats[2];
        c_ref[i].tMaxs().m_floats[3] = c[i].tMaxs().m_floats[3];
        
    }
    
    
#if 1
    for (i = 0; i < DATA_SIZE; i++)
    {
        
        Intersect_Test_Res[i] = Intersect(a[i], b[i]);
        Intersect_Ref_Res[i]  = Intersect_ref(a_ref[i], b_ref[i]);
        
        if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
        {
            printf("Diff on %d\n", i); 
            
            printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
            printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
            printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
            printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
            
            printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
            printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
            printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
            printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);   
        }
    }
#endif    
        
    uint64_t scalarTime;
    uint64_t vectorTime;
    size_t j;
        
    
    ////////////////////////////////////
    //
    // Time and Test Intersect
    //
    ////////////////////////////////////
	{
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            
            
            for (i = 0; i < DATA_SIZE; i++)
            {
                Intersect_Ref_Res[i]  = Intersect_ref(a_ref[i], b_ref[i]);
            }
            
			currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;        
        else
            scalarTime /= NUM_CYCLES;
    }
    
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            
            for (i = 0; i < DATA_SIZE; i++)
            {
                Intersect_Test_Res[i] = Intersect(a[i], b[i]);
            }

			currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;        
        else
            vectorTime /= NUM_CYCLES;
    }
    
    vlog( "Intersect Timing:\n" );
    vlog( "     \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
    
    //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
    
    for (i = 0; i < DATA_SIZE; i++)
    {
        if(Intersect_Test_Res[i] != Intersect_Ref_Res[i])
        {
            printf("Intersect fail at %d\n", i);
			return 1;
        }
    }
    
    ////////////////////////////////////
    //
    // Time and Test Merge
    //
    ////////////////////////////////////
	{
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            
            
            for (i = 0; i < DATA_SIZE; i++)
            {
                Merge_ref(a_ref[i], b_ref[i], c_ref[i]);
            }
            
			currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;        
        else
            scalarTime /= NUM_CYCLES;
    }
    
    
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            
            for (i = 0; i < DATA_SIZE; i++)
            {
                Merge(a[i], b[i], c[i]);
            }
            
			currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;        
        else
            vectorTime /= NUM_CYCLES;
    }
    
    vlog( "Merge Timing:\n" );
    vlog( "     \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
    
    //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
    /*
 c  [0]	float32_t	0.00455523
    [1]	float32_t	0.559712
    [2]	float32_t	0.0795838
    [3]	float32_t	0.10182
    
c_ref
    [0]	float32_t	0.00455523
    [1]	float32_t	0.559712
    [2]	float32_t	0.0795838
    [3]	float32_t	0.552081
    
    
c   [0]	float32_t	0.829904
    [1]	float32_t	0.692891
    [2]	float32_t	0.961654
    [3]	float32_t	0.666956
    
 c_ref
    [0]	float32_t	0.829904
    [1]	float32_t	0.692891
    [2]	float32_t	0.961654
    [3]	float32_t	0.522878
    */
    for (i = 0; i < DATA_SIZE; i++)
    {
        //ignore 4th component because it is not computed in all code-paths
        if( (fabs(c[i].tMaxs().m_floats[0] - c_ref[i].tMaxs().m_floats[0]) > 0.001) || 
           (fabs(c[i].tMaxs().m_floats[1] - c_ref[i].tMaxs().m_floats[1]) > 0.001) || 
           (fabs(c[i].tMaxs().m_floats[2] - c_ref[i].tMaxs().m_floats[2]) > 0.001) || 
          // (fabs(c[i].tMaxs().m_floats[3] - c_ref[i].tMaxs().m_floats[3]) > 0.001) || 
           (fabs(c[i].tMins().m_floats[0] - c_ref[i].tMins().m_floats[0]) > 0.001) || 
           (fabs(c[i].tMins().m_floats[1] - c_ref[i].tMins().m_floats[1]) > 0.001) || 
           (fabs(c[i].tMins().m_floats[2] - c_ref[i].tMins().m_floats[2]) > 0.001)  
          //|| (fabs(c[i].tMins().m_floats[3] - c_ref[i].tMins().m_floats[3]) > 0.001) 
           )
           
        
        //if((c[i].tMaxs().m_floats[0] != c_ref[i].tMaxs().m_floats[0]) || (c[i].tMaxs().m_floats[1] != c_ref[i].tMaxs().m_floats[1]) || (c[i].tMaxs().m_floats[2] != c_ref[i].tMaxs().m_floats[2]) || (c[i].tMaxs().m_floats[3] != c_ref[i].tMaxs().m_floats[3]) || (c[i].tMins().m_floats[0] != c_ref[i].tMins().m_floats[0]) || (c[i].tMins().m_floats[1] != c_ref[i].tMins().m_floats[1]) || (c[i].tMins().m_floats[2] != c_ref[i].tMins().m_floats[2]) || (c[i].tMins().m_floats[3] != c_ref[i].tMins().m_floats[3]))
        {
            printf("Merge fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
            
            printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
            printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
            printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
            printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
            printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
            printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
            
            printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
            printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
            printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
            printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
            printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
            printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
			return 1;

		}
        
    }
    
    ////////////////////////////////////
    //
    // Time and Test Select
    //
    ////////////////////////////////////
	{
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            
            
            for (i = 0; i < DATA_SIZE; i++)
            {
                Select_Ref_Res[i]  = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
            }
            
			currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;        
        else
            scalarTime /= NUM_CYCLES;
    }
    
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            
            for (i = 0; i < DATA_SIZE; i++)
            {
                Select_Test_Res[i] = Select(a[i], b[i], c[i]);
            }
            
			currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;        
        else
            vectorTime /= NUM_CYCLES;
    }
    
    vlog( "Select Timing:\n" );
    vlog( "     \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );
    
    //printf("scalar = %llu, vector = %llu\n", scalarTime, vectorTime);
    
    for (i = 0; i < DATA_SIZE; i++)
    {
        Select_Ref_Res[i]  = Select_ref(a_ref[i], b_ref[i], c_ref[i]);
        Select_Test_Res[i] = Select(a[i], b[i], c[i]);
        
        if(Select_Test_Res[i] != Select_Ref_Res[i])
        {
            printf("Select fail at %d with test = %d, ref = %d\n", i, Select_Test_Res[i], Select_Ref_Res[i]);
            
            printf("a_mx_f[0] = %.3f, a_mx_f[1] = %.3f, a_mx_f[2] = %.3f, a_mx_f[3] = %.3f\n", a[i].tMaxs().m_floats[0], a[i].tMaxs().m_floats[1], a[i].tMaxs().m_floats[2], a[i].tMaxs().m_floats[3]);
            printf("a_mi_f[0] = %.3f, a_mi_f[1] = %.3f, a_mi_f[2] = %.3f, a_mi_f[3] = %.3f\n", a[i].tMins().m_floats[0], a[i].tMins().m_floats[1], a[i].tMins().m_floats[2], a[i].tMins().m_floats[3]);
            printf("b_mx_f[0] = %.3f, b_mx_f[1] = %.3f, b_mx_f[2] = %.3f, b_mx_f[3] = %.3f\n", b[i].tMaxs().m_floats[0], b[i].tMaxs().m_floats[1], b[i].tMaxs().m_floats[2], b[i].tMaxs().m_floats[3]);
            printf("b_mi_f[0] = %.3f, b_mi_f[1] = %.3f, b_mi_f[2] = %.3f, b_mi_f[3] = %.3f\n", b[i].tMins().m_floats[0], b[i].tMins().m_floats[1], b[i].tMins().m_floats[2], b[i].tMins().m_floats[3]);
            printf("c_mx_f[0] = %.3f, c_mx_f[1] = %.3f, c_mx_f[2] = %.3f, c_mx_f[3] = %.3f\n", c[i].tMaxs().m_floats[0], c[i].tMaxs().m_floats[1], c[i].tMaxs().m_floats[2], c[i].tMaxs().m_floats[3]);
            printf("c_mi_f[0] = %.3f, c_mi_f[1] = %.3f, c_mi_f[2] = %.3f, c_mi_f[3] = %.3f\n", c[i].tMins().m_floats[0], c[i].tMins().m_floats[1], c[i].tMins().m_floats[2], c[i].tMins().m_floats[3]);
            
            printf("a_mx_f_ref[0] = %.3f, a_mx_f_ref[1] = %.3f, a_mx_f_ref[2] = %.3f, a_mx_f_ref[3] = %.3f\n", a_ref[i].tMaxs().m_floats[0], a_ref[i].tMaxs().m_floats[1], a_ref[i].tMaxs().m_floats[2], a_ref[i].tMaxs().m_floats[3]);
            printf("a_mi_f_ref[0] = %.3f, a_mi_f_ref[1] = %.3f, a_mi_f_ref[2] = %.3f, a_mi_f_ref[3] = %.3f\n", a_ref[i].tMins().m_floats[0], a_ref[i].tMins().m_floats[1], a_ref[i].tMins().m_floats[2], a_ref[i].tMins().m_floats[3]);
            printf("b_mx_f_ref[0] = %.3f, b_mx_f_ref[1] = %.3f, b_mx_f_ref[2] = %.3f, b_mx_f_ref[3] = %.3f\n", b_ref[i].tMaxs().m_floats[0], b_ref[i].tMaxs().m_floats[1], b_ref[i].tMaxs().m_floats[2], b_ref[i].tMaxs().m_floats[3]);
            printf("b_mi_f_ref[0] = %.3f, b_mi_f_ref[1] = %.3f, b_mi_f_ref[2] = %.3f, b_mi_f_ref[3] = %.3f\n", b_ref[i].tMins().m_floats[0], b_ref[i].tMins().m_floats[1], b_ref[i].tMins().m_floats[2], b_ref[i].tMins().m_floats[3]);
            printf("c_mx_f_ref[0] = %.3f, c_mx_f_ref[1] = %.3f, c_mx_f_ref[2] = %.3f, c_mx_f_ref[3] = %.3f\n", c_ref[i].tMaxs().m_floats[0], c_ref[i].tMaxs().m_floats[1], c_ref[i].tMaxs().m_floats[2], c_ref[i].tMaxs().m_floats[3]);
            printf("c_mi_f_ref[0] = %.3f, c_mi_f_ref[1] = %.3f, c_mi_f_ref[2] = %.3f, c_mi_f_ref[3] = %.3f\n", c_ref[i].tMins().m_floats[0], c_ref[i].tMins().m_floats[1], c_ref[i].tMins().m_floats[2], c_ref[i].tMins().m_floats[3]);
			return 1;
		}
        
    }
    
    return 0;
}
Пример #8
0
int Test_v3skew(void)
{
    btVector3 v, v1, v2, v3, vt1, vt2, vt3;
   
    float x,y,z,w;
    
    // Init the data
    x = RANDF_01;
    y = RANDF_01;
    z = RANDF_01;
    w = BT_NAN;     // w channel NaN
    v.setValue(x,y,z);
	v.setW(w);

    v1.setValue(w,w,w);
	v1.setW(w);

    vt3 = vt2 = vt1 = v3 = v2 = v1;
	
    {
		v3skew_ref(&v, &v1, &v2, &v3);
		v.getSkewSymmetricMatrix(&vt1, &vt2, &vt3);
	   /*
		if( v1.m_floats[0] != vt1.m_floats[0] || 
			v1.m_floats[1] != vt1.m_floats[1] ||
			v1.m_floats[2] != vt1.m_floats[2] )
		*/
        if(!(v1 == vt1))
        {	
			vlog( "Error - v3skew result error! "
					"\ncorrect v1 = (%10.4f, %10.4f, %10.4f) "
					"\ntested  v1 = (%10.4f, %10.4f, %10.4f) \n", 
					v1.m_floats[0], v1.m_floats[1], v1.m_floats[2], 
					vt1.m_floats[0], vt1.m_floats[1], vt1.m_floats[2]);
		
			return 1;
		}

		/*
        if( v2.m_floats[0] != vt2.m_floats[0] || 
			v2.m_floats[1] != vt2.m_floats[1] ||
			v2.m_floats[2] != vt2.m_floats[2] )
		*/
        if(!(v2 == vt2))
        {	
			vlog( "Error - v3skew result error! "
					"\ncorrect v2 = (%10.4f, %10.4f, %10.4f) "
					"\ntested  v2 = (%10.4f, %10.4f, %10.4f) \n", 
					v2.m_floats[0], v2.m_floats[1], v2.m_floats[2], 
					vt2.m_floats[0], vt2.m_floats[1], vt2.m_floats[2]);
		
			return 1;
		}

		/*
        if( v3.m_floats[0] != vt3.m_floats[0] || 
			v3.m_floats[1] != vt3.m_floats[1] ||
			v3.m_floats[2] != vt3.m_floats[2] )
		*/
        if(!(v3 == vt3))
        {	
			vlog( "Error - v3skew result error! "
					"\ncorrect v3 = (%10.4f, %10.4f, %10.4f) "
					"\ntested  v3 = (%10.4f, %10.4f, %10.4f) \n", 
					v3.m_floats[0], v3.m_floats[1], v3.m_floats[2], 
					vt3.m_floats[0], vt3.m_floats[1], vt3.m_floats[2]);
		
			return 1;
		}
	}
    
#define DATA_SIZE 256

	btVector3 v3_arr0[DATA_SIZE];
	btVector3 v3_arr1[DATA_SIZE];
	btVector3 v3_arr2[DATA_SIZE];
	btVector3 v3_arr3[DATA_SIZE];

    uint64_t scalarTime;
    uint64_t vectorTime;
    size_t j, k;

	for( k = 0; k < DATA_SIZE; k++ )
	{
        x = RANDF_01;
        y = RANDF_01;
        z = RANDF_01;
		v3_arr0[k].setValue(x,y,z);
		v3_arr0[k].setW(w);

		v3_arr1[k].setValue(w,w,w);
		v3_arr1[k].setW(w);

		v3_arr3[k] = v3_arr2[k] = v3_arr1[k];
	}
    
	{
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
			{
				size_t k32 = (k & (DATA_SIZE-1)); 
                v3skew_ref( &v3_arr0[k32], &v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]); 
			}
			currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;        
        else
            scalarTime /= NUM_CYCLES;
    }
    
    {
        uint64_t startTime, bestTime, currentTime;
        
        bestTime = -1LL;
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++) 
		{
            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
			{
				size_t k32 = (k & (DATA_SIZE -1)); 
                v3_arr0[k32].getSkewSymmetricMatrix(&v3_arr1[k32], &v3_arr2[k32], &v3_arr3[k32]); 
			}
			currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;        
        else
            vectorTime /= NUM_CYCLES;
    }

    vlog( "Timing:\n" );
    vlog( "    \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT, TicksToCycles( vectorTime ) / LOOPCOUNT );

    return 0;
}
Пример #9
0
int Test_v3lerp(void)
{
    btVector3 v1, v2;
    btScalar rt;

    float x,y,z,w;

    float vNaN =BT_NAN;
    w =BT_NAN;     // w channel NaN

    btVector3 correct_res, test_res;

    for (rt = 0.0f; rt <= 1.0f; rt += 0.1f)
    {
        correct_res.setValue(vNaN, vNaN, vNaN);
        test_res.setValue(vNaN, vNaN, vNaN);

        // Init the data
        x = RANDF_01;
        y = RANDF_01;
        z = RANDF_01;
        v1.setValue(x,y,z);
        v1.setW(w);

        x = RANDF_01;
        y = RANDF_01;
        z = RANDF_01;
        v2.setValue(x,y,z);
        v2.setW(w);

        correct_res = v3lerp_ref(correct_res, v1, v2, rt);
        test_res = v1.lerp(v2, rt);

        if( fabs(correct_res.m_floats[0] - test_res.m_floats[0]) +
                fabs(correct_res.m_floats[1] - test_res.m_floats[1]) +
                fabs(correct_res.m_floats[2] - test_res.m_floats[2]) > FLT_EPSILON * 4)
        {
            vlog( "Error - v3lerp result error! "
                  "\ncorrect = (%10.4f, %10.4f, %10.4f) "
                  "\ntested  = (%10.4f, %10.4f, %10.4f) \n"
                  "\n rt=%10.4f",
                  correct_res.m_floats[0], correct_res.m_floats[1], correct_res.m_floats[2],
                  test_res.m_floats[0], test_res.m_floats[1], test_res.m_floats[2], rt);

            return 1;
        }
    }

#define DATA_SIZE LOOPCOUNT

    btVector3 vec3_arr1[DATA_SIZE];
    btVector3 vec3_arr2[DATA_SIZE];
    btScalar  rt_arr[DATA_SIZE];

    uint64_t scalarTime;
    uint64_t vectorTime;
    size_t j, k;

    {
        uint64_t startTime, bestTime, currentTime;
        w =BT_NAN;     // w channel NaN

        bestTime = -1LL;
        scalarTime = 0;
        for (j = 0; j < NUM_CYCLES; j++)
        {
            for( k = 0; k < DATA_SIZE; k++ )
            {
                x = RANDF_01;
                y = RANDF_01;
                z = RANDF_01;
                vec3_arr1[k].setValue(x,y,z);
                vec3_arr1[k].setW(w);

                x = RANDF_01;
                y = RANDF_01;
                z = RANDF_01;
                vec3_arr2[k].setValue(x,y,z);
                vec3_arr2[k].setW(w);

                rt_arr[k] = RANDF_01;
            }

            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
            {
                v3lerp_ref(vec3_arr1[k], vec3_arr1[k], vec3_arr2[k], rt_arr[k]);
            }
            currentTime = ReadTicks() - startTime;
            scalarTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            scalarTime = bestTime;
        else
            scalarTime /= NUM_CYCLES;
    }

    {
        uint64_t startTime, bestTime, currentTime;

        bestTime = -1LL;
        vectorTime = 0;
        for (j = 0; j < NUM_CYCLES; j++)
        {
            for( k = 0; k < DATA_SIZE; k++ )
            {
                x = RANDF_01;
                y = RANDF_01;
                z = RANDF_01;
                vec3_arr1[k].setValue(x,y,z);
                vec3_arr1[k].setW(w);

                x = RANDF_01;
                y = RANDF_01;
                z = RANDF_01;
                vec3_arr2[k].setValue(x,y,z);
                vec3_arr2[k].setW(w);

                rt_arr[k] = RANDF_01;
            }

            startTime = ReadTicks();
            for( k = 0; k < LOOPCOUNT; k++ )
            {
                vec3_arr1[k] = vec3_arr1[k].lerp(vec3_arr2[k], rt_arr[k]);
            }
            currentTime = ReadTicks() - startTime;
            vectorTime += currentTime;
            if( currentTime < bestTime )
                bestTime = currentTime;
        }
        if( 0 == gReportAverageTimes )
            vectorTime = bestTime;
        else
            vectorTime /= NUM_CYCLES;
    }

    vlog( "Timing:\n" );
    vlog( "     \t    scalar\t    vector\n" );
    vlog( "    \t%10.4f\t%10.4f\n", TicksToCycles( scalarTime ) / LOOPCOUNT,
          TicksToCycles( vectorTime ) / LOOPCOUNT );

    return 0;
}
Пример #10
0
int Test_qtdot(void)
{
	btQuaternion q1, q2;
	float x, y, z, w, vNaN;
	vNaN = BT_NAN;  // w channel NaN

	// Init the data
	x = RANDF_01;
	y = RANDF_01;
	z = RANDF_01;
	w = RANDF_01;
	q1.setValue(x, y, z, w);

	x = RANDF_01;
	y = RANDF_01;
	z = RANDF_01;
	w = RANDF_01;
	q2.setValue(x, y, z, w);

	btScalar correct_res, test_res;

	{
		correct_res = vNaN;
		test_res = vNaN;
		correct_res = qtdot_ref(q1, q2);
		test_res = BT_OP(q1, q2);

		if (fabsf(correct_res - test_res) > FLT_EPSILON * 4)
		{
			vlog(
				"Error - qtdot result error! "
				"\ncorrect = %10.4f "
				"\ntested  = %10.4f \n",
				correct_res, test_res);

			return 1;
		}
	}

#define DATA_SIZE LOOPCOUNT

	btQuaternion qt_arr1[DATA_SIZE];
	btQuaternion qt_arr2[DATA_SIZE];
	btScalar res_arr[DATA_SIZE];

	uint64_t scalarTime;
	uint64_t vectorTime;
	size_t j, k;

	for (k = 0; k < DATA_SIZE; k++)
	{
		x = RANDF_01;
		y = RANDF_01;
		z = RANDF_01;
		w = RANDF_01;
		qt_arr1[k].setValue(x, y, z, w);

		x = RANDF_01;
		y = RANDF_01;
		z = RANDF_01;
		w = RANDF_01;
		qt_arr2[k].setValue(x, y, z, w);
	}

	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		scalarTime = 0;
		for (j = 0; j < NUM_CYCLES; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
			{
				size_t km = (k & (DATA_SIZE - 1));
				res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
				km++;
				res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
				km++;
				res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
				km++;
				res_arr[km] = qtdot_ref(qt_arr1[km], qt_arr2[km]);
			}
			currentTime = ReadTicks() - startTime;
			scalarTime += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			scalarTime = bestTime;
		else
			scalarTime /= NUM_CYCLES;
	}

	{
		uint64_t startTime, bestTime, currentTime;

		bestTime = -1LL;
		vectorTime = 0;
		for (j = 0; j < NUM_CYCLES; j++)
		{
			startTime = ReadTicks();
			for (k = 0; k + 4 <= LOOPCOUNT; k += 4)
			{
				size_t km = (k & (DATA_SIZE - 1));
				res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
				km++;
				res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
				km++;
				res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
				km++;
				res_arr[km] = BT_OP(qt_arr1[km], qt_arr2[km]);
				km++;
			}
			currentTime = ReadTicks() - startTime;
			vectorTime += currentTime;
			if (currentTime < bestTime)
				bestTime = currentTime;
		}
		if (0 == gReportAverageTimes)
			vectorTime = bestTime;
		else
			vectorTime /= NUM_CYCLES;
	}

	vlog("Timing:\n");
	vlog("     \t    scalar\t    vector\n");
	vlog("    \t%10.4f\t%10.4f\n", TicksToCycles(scalarTime) / LOOPCOUNT,
		 TicksToCycles(vectorTime) / LOOPCOUNT);

	return 0;
}