示例#1
0
void printvec(vector4double v)
{
    double a = vec_extract(v, 0);
    double b = vec_extract(v, 1);
    double c = vec_extract(v, 2);
    double d = vec_extract(v, 3);
    printf("%4.3f\t%4.3f\t%4.3f\t%4.3f\n", a, b, c, d);
}
示例#2
0
文件: extract-vsx.c 项目: 0day-ci/gcc
static void test()
{
  vector long long vl = {0, 1};
  vector double vd = {0.0, 1.0};

  check (vec_extract (vl, 0) == 0, "vec_extract, vl, 0");
  check (vec_extract (vd, 1) == 1.0, "vec_extract, vd, 1");
  check (vl[0] == 0, "[], vl, 0");
  check (vd[1] == 1.0, "[], vd, 0");
}
unsigned int
extract_uint_3 (vector unsigned int a)
{
  int c = 3;
  unsigned int b = vec_extract (a, c);
  return b;
}
short
extract_short_7 (vector short a)
{
  int c = 7;
  short b = vec_extract (a, c);
  return b;
}
TYPE
foo_3s (vector int v)
{
  int c = 3;
  int i = vec_extract (v, c);
  return (TYPE) i;
}
unsigned int
extract_bool_int_0 (vector bool int a)
{
  int c = 0;
  unsigned int b = vec_extract (a, c);
  return b;
}
unsigned short int
extract_bool_short_int_0 (vector bool short int a)
{
  int c = 0;
  unsigned short int b = vec_extract (a, c);
  return b;
}
TYPE
foo_3u (vector unsigned int v)
{
  int c = 3;
  unsigned int u = vec_extract (v, c);
  return (TYPE) u;
}
unsigned char
extract_bool_char_0 (vector bool char a)
{
  int c = 0;
  unsigned char b = vec_extract (a, c);
  return b;
}
signed char
extract_schar_15 (vector signed char a)
{
  int c = 15;
  signed char b = vec_extract (a, c);
  return b;
}
unsigned char
extract_uchar_0 (vector unsigned char a)
{
  int c = 0;
  unsigned char b = vec_extract (a, c);
  return b;
}
int
extract_int_3 (vector int a)
{
  int c = 3;
  int b = vec_extract (a, c);
  return b;
}
unsigned short
extract_ushort_7 (vector unsigned short a)
{
  int c = 7;
  unsigned short b = vec_extract (a, c);
  return b;
}
示例#14
0
inline int v_signmask(const v_int32x4& a)
{
    static const vec_uint4 slm = {0, 1, 2, 3};
    vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
    sv = vec_sl(sv, slm);
    sv = vec_sums(sv, vec_int4_z);
    return vec_extract(sv, 3);
}
示例#15
0
double _SIMD_extract_pd(__SIMDd a, int32_t i)
{
#if defined USE_IBM
  return vec_extract(a,i);
#else
  return *(((double*)&a)+i);
#endif
}
示例#16
0
// extract scalar from SIMD operand
float _SIMD_extract_ps(__SIMD a, int32_t i)
{
#if defined USE_IBM
  return vec_extract(a,i);
#else
  return *(((float*)&a)+i);
#endif
}
示例#17
0
int32_t _SIMD_extract_epi32(__SIMDi a, int32_t i)
{
#if defined USE_IBM
  return vec_extract(a,i);
#else
  return *(((int32_t*)&a)+i);
#endif
}
示例#18
0
static void test ()
{
  vector unsigned char vuc;
  vector signed char vsc;
  vector unsigned short vus;
  vector signed short vss;
  vector unsigned int vui;
  vector signed int vsi;
  vector float vf;

  init ();

  vuc = vec_lde (9*1, (unsigned char *)svuc);
  vsc = vec_lde (14*1, (signed char *)svsc);
  vus = vec_lde (7*2, (unsigned short *)svus);
  vss = vec_lde (1*2, (signed short *)svss);
  vui = vec_lde (3*4, (unsigned int *)svui);
  vsi = vec_lde (2*4, (signed int *)svsi);
  vf  = vec_lde (0*4, (float *)svf);

  check (vec_extract (vuc, 9) == 9, "vuc");
  check (vec_extract (vsc, 14) == 6, "vsc");
  check (vec_extract (vus, 7) == 7, "vus");
  check (vec_extract (vss, 1) == -3, "vss");
  check (vec_extract (vui, 3) == 3, "vui");
  check (vec_extract (vsi, 2) == 0, "vsi");
  check (vec_extract (vf,  0) == 0.0, "vf");
}
示例#19
0
int main(int argc, char **argv)
{
vector float t;
    vec_promote();                      /* { dg-error "vec_promote only accepts 2" } */
    vec_promote(1.0f);                  /* { dg-error "vec_promote only accepts 2" } */
    vec_promote(1.0f, 2, 3);            /* { dg-error "vec_promote only accepts 2" } */
    vec_extract ();                     /* { dg-error "vec_extract only accepts 2" } */
    vec_extract (t);                    /* { dg-error "vec_extract only accepts 2" } */
    vec_extract (t, 2);
    vec_extract (t, 2, 5, 6);           /* { dg-error "vec_extract only accepts 2" } */
    vec_splats ();                      /* { dg-error "vec_splats only accepts 1" } */
    vec_splats (t, 3);                  /* { dg-error "vec_splats only accepts 1" } */
    vec_insert ();                      /* { dg-error "vec_insert only accepts 3" } */
    vec_insert (t);                     /* { dg-error "vec_insert only accepts 3" } */
    vec_insert (t, 3);                  /* { dg-error "vec_insert only accepts 3" } */
    vec_insert (t, 3, 2, 4, 6, 6);      /* { dg-error "vec_insert only accepts 3" } */
    return 0;
}  
示例#20
0
inline int v_signmask(const v_int16x8& a)
{
    static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
    vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
    sv = vec_sl(sv, slm);
    vec_int4 svi = vec_int4_z;
    svi = vec_sums(vec_sum4s(sv, svi), svi);
    return vec_extract(svi, 3);
}
示例#21
0
/** Mask **/
inline int v_signmask(const v_uint8x16& a)
{
    vec_uchar16 sv  = vec_sr(a.val, vec_uchar16_sp(7));
    static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
    sv = vec_sl(sv, slm);
    vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
    static const vec_uint4 slm4 = {0, 0, 8, 8};
    sv4 = vec_sl(sv4, slm4);
    return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
}
示例#22
0
static void test()
{
  vector signed int va = {-7,11,-13,17};

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  vector signed int vb = {128,0,0,0};
#else
  vector signed int vb = {0,0,0,128};
#endif

  vector signed int vd = vec_sums (va, vb);
  signed int r = vec_extract (vd, 3);

  check (r == 136, "sums");
}
示例#23
0
文件: swaps-p8-14.c 项目: Alexpux/GCC
__attribute__((noinline)) void foo ()
{
  int i;
  vector long long va, vb, vc, vd, tmp;
  volatile unsigned long long three = 3;
  vector unsigned long long threes = vec_splats (three);
  for (i = 0; i < N; i+=2) {
    vb = vec_vsx_ld (0, (vector long long *)&cb[i]);
    vc = vec_vsx_ld (0, (vector long long *)&cc[i]);
    vd = vec_vsx_ld (0, (vector long long *)&cd[i]);
    tmp = vec_add (vb, vc);
    tmp = vec_sub (tmp, vd);
    tmp = vec_sra (tmp, threes);
    x = vec_extract (tmp, 0);
    vec_vsx_st (tmp, 0, (vector long long *)&ca[i]);
  }
}
示例#24
0
mxArray* omp_chol(const double m_dict[], 
    const double m_x[],
    mwSize M, 
    mwSize N,
    mwSize S,
    mwSize K, 
    double res_norm_bnd,
    int sparse_output,
    int verbose){

    // List of indices of selected atoms
    mwIndex *selected_atoms = 0; 
    // Simple binary mask of selected atoms
    int* selected_atoms_mask = 0;   
    // Storage for the Cholesky decomposition of D_I' D_I
    double *m_lt = 0;
    // The submatrix of selected atoms
    double* m_subdict = 0;
    // The proxy D' x
    double* v_proxy = 0;
    // The inner product of residual with atoms
    double* v_h = 0;
    // The residual
    double* v_r = 0;
    // b = D_I' d_k in the Cholesky decomposition updates
    double* v_b = 0;
    // New vector in the Cholesky decomposition updates
    double* v_w = 0;
    // Result of orthogonal projection LL' c = p_I
    double* v_c = 0;
    // Some temporary vectors
    double *v_t1 = 0, *v_t2 = 0;
    // Pointer to new atom
    const double* wv_new_atom;
    // residual norm squared
    double res_norm_sqr;
    // square of upper bound on residual norm
    double res_norm_bnd_sqr = SQR(res_norm_bnd);
    // Pointer to current signal
    const double *wv_x = 0;

    /// Output array
    mxArray* p_alpha;
    double* m_alpha;
    // row indices for non-zero entries in Alpha
    mwIndex *ir_alpha;
    // indices for first non-zero entry in column
    mwIndex *jc_alpha;
    /// Index for non-zero entries in alpha
    mwIndex nz_index;


    // counters
    int i, j , k, s;
    // index of new atom
    mwIndex new_atom_index;
    // misc variables 
    double d1, d2;

    // Maximum number of columns to be used in representations
    mwSize max_cols;

    // structure for tracking time spent.
    omp_profile profile;

    if (K < 0 || K > M) {
        // K cannot be greater than M.
        K = M;
    }
    max_cols = (mwSize)(ceil(sqrt((double)M)/2.0) + 1.01);
    if(max_cols < K){
        max_cols = K;
    }
    // Memory allocations
    // Number of selected atoms cannot exceed M
    selected_atoms = (mwIndex*) mxMalloc(M*sizeof(mwIndex));
    // Total number of atoms is N
    selected_atoms_mask = (int*) mxMalloc(N*sizeof(int));
    // Number of rows in L cannot exceed M. Number of columns 
    // cannot exceed max_cols.
    m_lt = (double*) mxMalloc(M*max_cols*sizeof (double));
    // Number of entries in new line for L cannot exceed N.
    v_b = (double*)mxMalloc(N*sizeof(double));
    v_w = (double*)mxMalloc(N*sizeof(double));
    v_c = (double*)mxMalloc(M*sizeof(double));
    // Giving enough space for temporary vectors
    v_t1 = (double*)mxMalloc(N*sizeof(double));
    v_t2 = (double*)mxMalloc(N*sizeof(double));
    // Keeping max_cols space for subdictionary. 
    m_subdict = (double*)mxMalloc(max_cols*M*sizeof(double));
    // Proxy vector is in R^N
    v_proxy = (double*)mxMalloc(N*sizeof(double));
    // h is in R^N.
    v_h = (double*)mxMalloc(N*sizeof(double));
    // Residual is in signal space R^M.
    v_r = (double*)mxMalloc(M*sizeof(double));

    if (sparse_output == 0){
        p_alpha = mxCreateDoubleMatrix(N, S, mxREAL);
        m_alpha =  mxGetPr(p_alpha);
        ir_alpha = 0;
        jc_alpha = 0;
    }else{
        p_alpha = mxCreateSparse(N, S, max_cols*S, mxREAL);
        m_alpha = mxGetPr(p_alpha);
        ir_alpha = mxGetIr(p_alpha);
        jc_alpha = mxGetJc(p_alpha);
        nz_index = 0;
        jc_alpha[0] = 0;
    }
    omp_profile_init(&profile);

    for(s=0; s<S; ++s){
        wv_x = m_x + M*s;
        // Initialization
        res_norm_sqr = inner_product(wv_x, wv_x, M);
        //Compute proxy p  = D' * x
        mult_mat_t_vec(1, m_dict, wv_x, v_proxy, M, N);
        omp_profile_toctic(&profile, TIME_DtR);
        // h = p = D' * r
        copy_vec_vec(v_proxy, v_h, N);
        for (i=0; i<N; ++i){
            selected_atoms_mask[i] = 0;
        }
        // Number of atoms selected so far.
        k = 0;
        // Iterate for each atom
        while (k < K &&  res_norm_sqr > res_norm_bnd_sqr){
            omp_profile_tic(&profile);
            // Pick the index of (k+1)-th atom
            new_atom_index = abs_max_index(v_h, N);
            omp_profile_toctic(&profile, TIME_MaxAbs);
            // If this atom is already selected, we will break
            if (selected_atoms_mask[new_atom_index]){
                // This is unlikely due to orthogonal structure of OMP
                if (verbose){
                    mexPrintf("This atom is already selected.");
                }
                break;
            }
            // Check for small values
            d2 = v_h[new_atom_index];
            if (SQR(d2) < 1e-14){
                // The inner product of residual with new atom is way too small.
                break;
            }
            // Store the index of new atom
            selected_atoms[k] = new_atom_index;
            selected_atoms_mask[new_atom_index] = 1;

            // Copy the new atom to the sub-dictionary
            wv_new_atom = m_dict + new_atom_index*M;
            copy_vec_vec(wv_new_atom, m_subdict+k*M, M);
            omp_profile_toctic(&profile, TIME_DictSubMatrixUpdate);

            // Cholesky update
            if (k == 0){
                // Simply initialize the L matrix
                *m_lt = 1;
            }else{
                // Incremental Cholesky decomposition
                if (chol_update(m_subdict, wv_new_atom, m_lt, 
                    v_b, v_w, M, k) != 0){
                    break;
                }
            }
            omp_profile_toctic(&profile, TIME_LCholUpdate);
            // It is time to increase the count of selected atoms
            ++k;
            // We will now solve the equation L L' alpha_I = p_I
            vec_extract(v_proxy, selected_atoms, v_t1, k);
            spd_chol_lt_solve(m_lt, v_t1, v_c, M, k);
            omp_profile_toctic(&profile, TIME_LLtSolve);
            // Compute residual
            // r  = x - D_I c
            mult_mat_vec(-1, m_subdict, v_c, v_r, M, k);
            sum_vec_vec(1, wv_x, v_r, M);
            omp_profile_toctic(&profile, TIME_RUpdate);
            // Update h = D' r
            mult_mat_t_vec(1, m_dict, v_r, v_h, M, N);
            // Update residual norm squared
            res_norm_sqr = inner_product(v_r, v_r, M);
            omp_profile_toctic(&profile, TIME_DtR);
            //mexPrintf(".\n");
        }

        // Write the output vector
        if(sparse_output == 0){
            // Write the output vector
            double* wv_alpha =  m_alpha + N*s;
            fill_vec_sparse_vals(v_c, selected_atoms, wv_alpha, N, k);
        }
        else{
            // Sort the row indices
            quicksort_indices(selected_atoms, v_c, k);
            // add the non-zero entries for this column
            for(j=0; j <k; ++j){
                m_alpha[nz_index] = v_c[j];
                ir_alpha[nz_index] = selected_atoms[j];
                ++nz_index;
            }
            // fill in the total number of nonzero entries in the end.
            jc_alpha[s+1] = jc_alpha[s] + k;
        }
    }
    if(verbose){
        omp_profile_print(&profile);
    }

    // Memory cleanup
    mxFree(selected_atoms);
    mxFree(selected_atoms_mask);
    mxFree(m_lt);
    mxFree(v_b);
    mxFree(v_w);
    mxFree(v_c);
    mxFree(v_t1);
    mxFree(v_t2);
    mxFree(m_subdict);
    mxFree(v_proxy);
    mxFree(v_h);
    mxFree(v_r);

    // Return the result
    return p_alpha;
}
示例#25
0
long foou (vector unsigned char a, vector unsigned char b)
{
  return vec_extract (vec_vbpermq (a, b), OFFSET);
}
示例#26
0
TYPE
foo_3u (vector unsigned int v)
{
  unsigned int u = vec_extract (v, 3);
  return (TYPE) u;
}
示例#27
0
TYPE
foo_2s (vector int v)
{
  int i = vec_extract (v, 2);
  return (TYPE) i;
}
示例#28
0
long get_value (vector long v) { return vec_extract (v, OFFSET); }
示例#29
0
short extract_hi_n_mem (vector short *p, int n) { return vec_extract (*p, n); }
示例#30
0
double get_value (vector double *p) { return vec_extract (*p, 0); }