Ejemplo n.º 1
0
// 64-bit integer multiplication
int test_simd_mul_u64()
{
    int test_result = 0;
    const int alignment = SIMD_WIDTH_BYTES;

    // Integer 
    {
        const int num_elems = SIMD_STREAMS_64;
        const TEST_TYPES test_type = TEST_U64;
        unsigned long int *arr_A = NULL, *arr_B = NULL, *arr_C1 = NULL, *arr_C2 = NULL;

        create_test_array(test_type, (void **)&arr_A, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_B, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C1, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C2, num_elems, alignment);

        SIMD_INT va = simd_load(arr_A);
        SIMD_INT vb = simd_load(arr_B);
        SIMD_INT vc = simd_mul_u64(va, vb);

        for (int i = 0; i < num_elems; ++i)
            arr_C2[i] = arr_A[i] * arr_B[i]; 

        simd_store(arr_C1, vc);
        test_result += validate_test_arrays(test_type, (void *)arr_C1, (void *)arr_C2, num_elems);

        free(arr_A);
        free(arr_B);
        free(arr_C1);
        free(arr_C2);
    }

    return test_result;
}
Ejemplo n.º 2
0
// Convert unsigned 64-bit integer to 32/64-bit floating-point
int test_simd_cvt_u64_fp()
{
    int test_result = 0;
    const int alignment = SIMD_WIDTH_BYTES;

    // Float 
    {
        const int num_elems = SIMD_STREAMS_32;
        const TEST_TYPES test_type = TEST_FLT;
        unsigned long int *arr_A = NULL;
        float *arr_C1 = NULL, *arr_C2 = NULL;

        create_test_array(TEST_U64, (void **)&arr_A, SIMD_STREAMS_64, alignment);
        create_test_array(test_type, (void **)&arr_C1, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C2, num_elems, alignment);

        SIMD_INT va = simd_load(arr_A);
        SIMD_FLT vc = simd_cvt_u64_f32(va);

        for (int i = 0; i < num_elems/2; ++i)
            arr_C2[i] = (float)arr_A[i]; 
        for (int i = 0; i < num_elems/2; ++i)
            arr_C2[num_elems/2 + i] = 0.0; 

        simd_store(arr_C1, vc);
        test_result += validate_test_arrays(test_type, (void *)arr_C1, (void *)arr_C2, num_elems);

        free(arr_A);
        free(arr_C1);
        free(arr_C2);
    }

    // Double 
    {
        const int num_elems = SIMD_STREAMS_64;
        const TEST_TYPES test_type = TEST_DBL;
        unsigned long int *arr_A = NULL;
        double *arr_C1 = NULL, *arr_C2 = NULL;

        create_test_array(TEST_U64, (void **)&arr_A, SIMD_STREAMS_64, alignment);
        create_test_array(test_type, (void **)&arr_C1, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C2, num_elems, alignment);

        SIMD_INT va = simd_load(arr_A);
        SIMD_DBL vc = simd_cvt_u64_f64(va);

        for (int i = 0; i < num_elems; ++i)
            arr_C2[i] = (double)arr_A[i]; 

        simd_store(arr_C1, vc);
        test_result += validate_test_arrays(test_type, (void *)arr_C1, (void *)arr_C2, num_elems);

        free(arr_A);
        free(arr_C1);
        free(arr_C2);
    }

    return test_result;
}
Ejemplo n.º 3
0
// Aligned loads and stores
int test_simd_loadstore()
{
    int test_result = 0;
    const int alignment = SIMD_WIDTH_BYTES;

    // Integer
    {
        const int num_elems = SIMD_STREAMS_32;
        const TEST_TYPES test_type = TEST_I32;
        int *arr_A = NULL, *arr_B = NULL;

        create_test_array(test_type, (void **)&arr_A, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_B, num_elems, alignment);

        SIMD_INT va = simd_load(arr_A);

        simd_store(arr_B, va);
        test_result += validate_test_arrays(test_type, (void *)arr_A, (void *)arr_B, num_elems);

        free(arr_A);
        free(arr_B);
    }

    // Float 
    {
        const int num_elems = SIMD_STREAMS_32;
        const TEST_TYPES test_type = TEST_FLT;
        float *arr_A = NULL, *arr_B = NULL;

        create_test_array(test_type, (void **)&arr_A, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_B, num_elems, alignment);

        SIMD_FLT va = simd_load(arr_A);

        simd_store(arr_B, va);
        test_result += validate_test_arrays(test_type, (void *)arr_A, (void *)arr_B, num_elems);

        free(arr_A);
        free(arr_B);
    }

    return test_result;
}
Ejemplo n.º 4
0
// Pack and merge the low 32-bits of 64-bit integers
int test_simd_packmerge_i32()
{
    int test_result = 0;
    const int alignment = SIMD_WIDTH_BYTES;

    // Integer 
    {
        const int num_elems = SIMD_STREAMS_32;
        const TEST_TYPES test_type = TEST_I32;
        long int *arr_A = NULL, *arr_B = NULL;
        int *arr_C1 = NULL, *arr_C2 = NULL;

        create_test_array(TEST_I64, (void **)&arr_A, SIMD_STREAMS_64, alignment);
        create_test_array(TEST_I64, (void **)&arr_B, SIMD_STREAMS_64, alignment);
        create_test_array(test_type, (void **)&arr_C1, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C2, num_elems, alignment);

        SIMD_INT va = simd_load(arr_A);
        SIMD_INT vb = simd_load(arr_B);
        SIMD_INT vc = simd_packmerge_i32(va, vb);

        for (int i = 0; i < num_elems/2; ++i)
            arr_C2[i] = ((int *)arr_A)[2*i]; 
        for (int i = 0; i < num_elems/2; ++i)
            arr_C2[num_elems/2 + i] = ((int *)arr_B)[2*i]; 

        simd_store(arr_C1, vc);
        test_result += validate_test_arrays(test_type, (void *)arr_C1, (void *)arr_C2, num_elems);

        free(arr_A);
        free(arr_B);
        free(arr_C1);
        free(arr_C2);
    }

    return test_result;
}
Ejemplo n.º 5
0
// Floating-point fused multiply-add
int test_simd_fmadd()
{
    int test_result = 0;
    const int alignment = SIMD_WIDTH_BYTES;

    // Float 
    {
        const int num_elems = SIMD_STREAMS_32;
        const TEST_TYPES test_type = TEST_FLT;
        float *arr_A = NULL, *arr_B = NULL, *arr_C1 = NULL, *arr_C2 = NULL;

        create_test_array(test_type, (void **)&arr_A, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_B, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C1, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C2, num_elems, alignment);

        SIMD_FLT va = simd_load(arr_A);
        SIMD_FLT vb = simd_load(arr_B);
        SIMD_FLT vc = simd_load(arr_C1);
        vc = simd_fmadd(va, vb, vc);

        for (int i = 0; i < num_elems; ++i)
            arr_C2[i] = arr_A[i] * arr_B[i] + arr_C1[i]; 

        simd_store(arr_C1, vc);
        test_result += validate_test_arrays(test_type, (void *)arr_C1, (void *)arr_C2, num_elems);

        free(arr_A);
        free(arr_B);
        free(arr_C1);
        free(arr_C2);
    }

    // Double 
    {
        const int num_elems = SIMD_STREAMS_64;
        const TEST_TYPES test_type = TEST_DBL;
        double *arr_A = NULL, *arr_B = NULL, *arr_C1 = NULL, *arr_C2 = NULL;

        create_test_array(test_type, (void **)&arr_A, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_B, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C1, num_elems, alignment);
        create_test_array(test_type, (void **)&arr_C2, num_elems, alignment);

        SIMD_DBL va = simd_load(arr_A);
        SIMD_DBL vb = simd_load(arr_B);
        SIMD_DBL vc = simd_load(arr_C1);
        vc = simd_fmadd(va, vb, vc);

        for (int i = 0; i < num_elems; ++i)
            arr_C2[i] = arr_A[i] * arr_B[i] + arr_C1[i]; 

        simd_store(arr_C1, vc);
        test_result += validate_test_arrays(test_type, (void *)arr_C1, (void *)arr_C2, num_elems);

        free(arr_A);
        free(arr_B);
        free(arr_C1);
        free(arr_C2);
    }

    return test_result;
}
Ejemplo n.º 6
0
RT_engine_error surface_intersection(Surface::SurfaceClass surf_class, real_t* params,
                                     size_t Nrays, real_t* X, real_t* Y, real_t* Z,
                                     real_t* cX, real_t* cY, real_t* cZ,
                                     size_t *N_bad, beam_flag_t* flag)
{

    size_t N_simd = Nrays - (Nrays % SIMD_VEC_LEN);
    real_t zero = 0.0;

#ifdef USE_SIMD
    real_t s[SIMD_VEC_LEN];
    simd_real_t simd_s;
    simd_real_t simd_X;
    simd_real_t simd_Y;
    simd_real_t simd_Z;
    simd_real_t simd_cX;
    simd_real_t simd_cY;
    simd_real_t simd_cZ;


#ifdef USE_OPENMP
#pragma omp parallel for
#endif
    for (size_t i = 0; i < N_simd; i += SIMD_VEC_LEN) {

        switch ( surf_class ) {
            case Surface::Plane: { // trivial case of Z=0 plane
                simd_Z = simd_load(Z+i);
                simd_cZ = simd_load(cZ+i);
                simd_s = -simd_div(simd_Z,simd_cZ);      // -Z/cZ
                simd_Z = simd_broadcast_scalar(&zero);     // Z = 0
                simd_store(s,simd_s);

                for (int j = 0; j < SIMD_VEC_LEN; ++j) {
                    if (s[j] < 0) { // distance must be non-negative!
                        flag[i+j] = 0;
                        ++(*N_bad);
                    }
                }

                simd_X = simd_load(X+i);
                simd_Y = simd_load(Y+i);
                simd_cX = simd_load(cX+i);
                simd_cY = simd_load(cY+i);

                simd_cX = simd_mul(simd_cX,simd_s); // cX*s
                simd_cY = simd_mul(simd_cY,simd_s); // cY*s

                simd_X = simd_add(simd_X,simd_cX); // X_new = X + cX*s
                simd_Y = simd_add(simd_Y,simd_cY); // Y_new = Y + cY*s

                simd_store(X+i,simd_X);
                simd_store(Y+i,simd_Y);
                simd_store(Z+i,simd_Z);
                break;
            }
        case Surface::Conic: {
            break;
        }
        }
    }
#else // generic non-SIMD realization
    real_t s;

#endif
}