コード例 #1
0
ファイル: vrml_env.c プロジェクト: DimondTheCat/xray
int itemNextStep(LWEnvelopeID *env, LWEnvKeyframeID *k, int *step, LWDVector val)
{
	int i, curstep, newstep;
	double	t;
	i = MINDEX(step);
	curstep = step[i];
	newstep	= curstep;
	t = curstep;
	t /= sceneInfo->framesPerSecond;

	if(step[0]==curstep && k[0])
	{
		val[0] = keyValue(env,k[0]);
		if(k[0] = envInfo->nextKey(env[0],k[0]))
		{
			step[0] = keyStep(env,k[0]);
			newstep = step[0];
		}
	}
	else
	{
		val[0] = envInfo->evaluate(env[0], t);
	}
	if(step[1]==curstep && k[1])
	{
		val[1] = keyValue(env,k[1]);
		if(k[1] = envInfo->nextKey(env[1],k[1]))
		{
			step[1] = keyStep(env,k[1]);
			newstep = newstep>curstep ? MIN(newstep,step[1]):step[1];
		}
	}
	else
	{
		val[1] = envInfo->evaluate(env[1], t);
	}

	if(step[2]==curstep && k[2])
	{
		val[2] = keyValue(env,k[2]);
		if(k[2] = envInfo->nextKey(env[2],k[2]))	
		{
			step[2] = keyStep(env,k[2]);
			newstep = newstep>curstep ? MIN(newstep,step[2]):step[2];
		}
	}
	else
	{
		val[2] = envInfo->evaluate(env[2], t);
	}

	if(!k[0])
		step[0] = newstep;
	if(!k[1])
		step[1] = newstep;
	if(!k[2])
		step[2] = newstep;

	return curstep;
}
コード例 #2
0
ファイル: matvec.c プロジェクト: tierex/uppsala_university
/**
 * Reference implementation of the matrix vector multiply
 * algorithm. Used to verify the answer. Do NOT change this function.
 */
static void
matvec_ref()
{
        int i, j;

	for (i = 0; i < SIZE; i++)
                for (j = 0; j < SIZE; j++)
                        vec_ref[i] += mat_a[MINDEX(i, j)] * vec_b[j];
}
コード例 #3
0
ファイル: matvec.c プロジェクト: tierex/uppsala_university
static void
matvec_sse()
{
        /* Assume that the data size is an even multiple of the 128 bit
         * SSE vectors (i.e. 4 floats) */
        assert(!(SIZE & 0x3));

        /* TASK: Implement your SSE version of the matrix-vector
         * multiplication here.
         */
        /* HINT: You might find at least the following instructions
         * useful:
         *  - _mm_setzero_ps
         *  - _mm_load_ps
         *  - _mm_hadd_ps
         *  - _mm_cvtss_f32
         *
         * HINT: You can create the sum of all elements in a vector
         * using two hadd instructions.
         */

        __m128 dummy=_mm_setzero_ps();
        for(int i=0;i<SIZE;++i){
            __m128 temp=_mm_setzero_ps();
            for(int j=0;j<SIZE;j+=4){

                __m128 mm_vec_b=_mm_load_ps((__m128*)(vec_b+j));
                __m128 mm_matr=_mm_load_ps((__m128*)(mat_a+MINDEX(i,j)));
                __m128 out=_mm_mul_ps(mm_vec_b,mm_matr);
                temp=_mm_add_ps(temp,out);

//                vec_c[i]+=_mm_cvtss_f32(_mm_dp_ps(mm_matr,mm_vec_b,0xf1));
            }
            __m128 res=_mm_hadd_ps(_mm_hadd_ps(temp,dummy),dummy);
            vec_c[i]=_mm_cvtss_f32(res);
        }

}
コード例 #4
0
ファイル: matvec.c プロジェクト: tierex/uppsala_university
/**
 * Initialize mat_a and vec_b with "random" data. Write to every
 * element in mat_c to make sure that the kernel allocates physical
 * memory to every page in the matrix before we start doing
 * benchmarking.
 */
static void
init()
{
        int i, j;

        mat_a = _mm_malloc(sizeof(*mat_a) * SIZE * SIZE, XMM_ALIGNMENT_BYTES);
        vec_b = _mm_malloc(sizeof(*vec_b) * SIZE, XMM_ALIGNMENT_BYTES);
        vec_c = _mm_malloc(sizeof(*vec_c) * SIZE, XMM_ALIGNMENT_BYTES);
        vec_ref = _mm_malloc(sizeof(*vec_ref) * SIZE, XMM_ALIGNMENT_BYTES);

        if (!mat_a || !vec_b || !vec_c || !vec_ref) {
                fprintf(stderr, "Memory allocation failed\n");
                abort();
        }

        for (i = 0; i < SIZE; i++) {
                for (j = 0; j < SIZE; j++)
                        mat_a[MINDEX(i, j)] = ((7 * i + j) & 0x0F) * 0x1P-2F;
                vec_b[i] = ((i * 17) & 0x0F) * 0x1P-2F;
        }

        memset(vec_c, 0, sizeof(vec_c));
        memset(vec_ref, 0, sizeof(vec_ref));
}