Пример #1
0
/*
 * bool testWithLength(int length)
 *
 * Creates three random vectors with the given length and times the performance of scalar_fma() and
 * vectorized_fma() with these vectors as input.
 *
 * Returns true if the results match.
 */
bool testWithLength(int length) {
  printf("Test length: %d\n", length);

  // Create (and clone) three vectors with the given length
  struct doubleVector a = random_vector(length);
  struct doubleVector b = random_vector(length);
  struct doubleVector c = random_vector(length);

  struct doubleVector a2 = vector_clone(&a);
  struct doubleVector b2 = vector_clone(&b);
  struct doubleVector c2 = vector_clone(&c);
  
 
  // Test scalar performance 
  printf("\tScalar cycle count: %d\n", perfTest(&a, &b, &c, scalar_fma));
 
  // Test vector performance
  printf("\tVector cycle count: %d\n", perfTest(&a2, &b2, &c2, vector_fma));

  // Compare results
  bool correct = vector_compare(&a, &a2);
  printf("\t%s\n", correct ? "MATCH" : "NO MATCH");
  
  // Free dynamic resources
  free(a.data);
  free(b.data);
  free(c.data);
  free(a2.data);
  free(b2.data);
  free(c2.data);

  return correct;
}
Пример #2
0
static int qsort_rows_compar(void* qsort_man, void* pq1, void* pq2)
{
    qsort_man_t* qm = (qsort_man_t*)qsort_man;
    numint_t* q1 = *((numint_t**)pq1);
    numint_t* q2 = *((numint_t**)pq2);
    return vector_compare(qm->pk,q1,q2,qm->size);
}
Пример #3
0
int matrix_compare_rows(pk_internal_t* pk,
                        matrix_t* mat, size_t l1, size_t l2)
{
    return vector_compare(pk,
                          mat->p[l1],
                          mat->p[l2],mat->nbcolumns);
}
Пример #4
0
static int qsort_rows_with_sat_compar(void* qsort_man, void* q1, void* q2)
{
    qsort_man_t* qm = (qsort_man_t*)qsort_man;
    const qsort_t* qs1 = (const qsort_t*)q1;
    const qsort_t* qs2 = (const qsort_t*)q2;
    return vector_compare( qm->pk,
                           qs1->p,
                           qs2->p,
                           qm->size );
}
Пример #5
0
void test_eigen(const std::string& fn, bool is_symm)
{
    std::cout << "Reading..." << "\n";
    std::size_t sz;
    // read file
    std::fstream f(fn.c_str(), std::fstream::in);
    //read size of input matrix
    read_matrix_size(f, sz);

    bool is_row = viennacl::is_row_major<MatrixLayout>::value;
    if (is_row)
      std::cout << "Testing row-major matrix of size " << sz << "-by-" << sz << std::endl;
    else
      std::cout << "Testing column-major matrix of size " << sz << "-by-" << sz << std::endl;

    viennacl::matrix<ScalarType> A_input(sz, sz), A_ref(sz, sz), Q(sz, sz);
    // reference vector with reference values from file
    std::vector<ScalarType> eigen_ref_re(sz);
    // calculated real eigenvalues
    std::vector<ScalarType> eigen_re(sz);
    // calculated im. eigenvalues
    std::vector<ScalarType> eigen_im(sz);

    // read input matrix from file
    read_matrix_body(f, A_input);
    // read reference eigenvalues from file
    read_vector_body(f, eigen_ref_re);


    f.close();

    A_ref = A_input;

    std::cout << "Calculation..." << "\n";

    Timer timer;
    timer.start();
    // Start the calculation
    if(is_symm)
        viennacl::linalg::qr_method_sym(A_input, Q, eigen_re);
    else
        viennacl::linalg::qr_method_nsm(A_input, Q, eigen_re, eigen_im);
/*

    std::cout << "\n\n Matrix A: \n\n";
    matrix_print(A_input);
    std::cout << "\n\n";

    std::cout << "\n\n Matrix Q: \n\n";
    matrix_print(Q);
    std::cout << "\n\n";
*/

    double time_spend = timer.get();

    std::cout << "Verification..." << "\n";

    bool is_hessenberg = check_hessenberg(A_input);
    bool is_tridiag = check_tridiag(A_input);

    ublas::matrix<ScalarType> A_ref_ublas(sz, sz), A_input_ublas(sz, sz), Q_ublas(sz, sz), result1(sz, sz), result2(sz, sz);
    viennacl::copy(A_ref, A_ref_ublas);
    viennacl::copy(A_input, A_input_ublas);
    viennacl::copy(Q, Q_ublas);

    // compute result1 = ublas::prod(Q_ublas, A_input_ublas);   (terribly slow when using ublas directly)
    for (std::size_t i=0; i<result1.size1(); ++i)
      for (std::size_t j=0; j<result1.size2(); ++j)
      {
        ScalarType value = 0;
        for (std::size_t k=0; k<Q_ublas.size2(); ++k)
          value += Q_ublas(i, k) * A_input_ublas(k, j);
        result1(i,j) = value;
      }
    // compute result2 = ublas::prod(A_ref_ublas, Q_ublas);   (terribly slow when using ublas directly)
    for (std::size_t i=0; i<result2.size1(); ++i)
      for (std::size_t j=0; j<result2.size2(); ++j)
      {
        ScalarType value = 0;
        for (std::size_t k=0; k<A_ref_ublas.size2(); ++k)
          value += A_ref_ublas(i, k) * Q_ublas(k, j);
        result2(i,j) = value;
      }


    ScalarType prods_diff = matrix_compare(result1, result2);
    ScalarType eigen_diff = vector_compare(eigen_re, eigen_ref_re);


    bool is_ok = is_hessenberg;

    if(is_symm)
        is_ok = is_ok && is_tridiag;

    is_ok = is_ok && (eigen_diff < EPS);
    is_ok = is_ok && (prods_diff < EPS);

    // std::cout << A_ref << "\n";
    // std::cout << A_input << "\n";
    // std::cout << Q << "\n";
    // std::cout << eigen_re << "\n";
    // std::cout << eigen_im << "\n";
    // std::cout << eigen_ref_re << "\n";
    // std::cout << eigen_ref_im << "\n";

    // std::cout << result1 << "\n";
    // std::cout << result2 << "\n";
    // std::cout << eigen_ref << "\n";
    // std::cout << eigen << "\n";

    printf("%6s [%dx%d] %40s time = %.4f\n", is_ok?"[[OK]]":"[FAIL]", (int)A_ref.size1(), (int)A_ref.size2(), fn.c_str(), time_spend);
    printf("tridiagonal = %d, hessenberg = %d prod-diff = %f eigen-diff = %f\n", is_tridiag, is_hessenberg, prods_diff, eigen_diff);
    std::cout << std::endl << std::endl;

    if (!is_ok)
      exit(EXIT_FAILURE);

}
Пример #6
0
/* subdivide the triangles of the object once
   The order of this algorithm is probably something like O(n^42) :)
   but I can't think of something smarter at the moment 
*/
static Object *subdivide( Object *obj )
{
  /* create for worst case (which I dont't know) */
  int start, t, i, v;
  int index_list[1000];
  int index_cnt, index_found;
  Object *tmp = (Object *)malloc( sizeof(Object) );
  Object *ret = (Object *)malloc( sizeof(Object) );
  Object *c_ret;
  
  tmp->vertex = 
      (Vector *)malloc( 100*obj->num_vertex*sizeof( Vector ) );
  tmp->triangle = 
      (Triangle *)malloc( 4*obj->num_triangle*sizeof( Triangle ) );
  tmp->num_vertex = 0;
  tmp->num_triangle = 0;
  ret->vertex = 
      (Vector *)malloc( 100*obj->num_vertex*sizeof( Vector ) );
  ret->triangle = 
      (Triangle *)malloc( 4*obj->num_triangle*sizeof( Triangle ) );
  ret->num_vertex = 0;
  ret->num_triangle = 0;
#ifdef PRINT_STAT
  fprintf( stderr, "in v=%d t=%d\n", 
           obj->num_vertex, obj->num_triangle );
#endif
  /* for each triangle create 3 new vertexes and the 4 
     corresponding triangles 
  */
  for (t=0; t<obj->num_triangle; ++t) {
    /* copy the three original vertexes */
    for (i=0; i<3; ++i) {
      tmp->vertex[tmp->num_vertex++] =
        obj->vertex[obj->triangle[t].i[i]];
    }
    
    /* create 3 new */
    tmp->vertex[tmp->num_vertex] = 
        obj->vertex[obj->triangle[t].i[0]];
    vector_add( &tmp->vertex[tmp->num_vertex],
                 &obj->vertex[obj->triangle[t].i[1]] );
    vector_mul( &tmp->vertex[tmp->num_vertex++], 0.5 ); 
    
    tmp->vertex[tmp->num_vertex] = 
        obj->vertex[obj->triangle[t].i[1]];
    vector_add( &tmp->vertex[tmp->num_vertex],
                 &obj->vertex[obj->triangle[t].i[2]] );
    vector_mul( &tmp->vertex[tmp->num_vertex++], 0.5 ); 
    
    tmp->vertex[tmp->num_vertex] = 
        obj->vertex[obj->triangle[t].i[2]];
    vector_add( &tmp->vertex[tmp->num_vertex],
                 &obj->vertex[obj->triangle[t].i[0]] );
    vector_mul( &tmp->vertex[tmp->num_vertex++], 0.5 ); 

    /* create triangles */
    start = tmp->num_vertex-6;
    
    tmp->triangle[tmp->num_triangle].i[0] = start;
    tmp->triangle[tmp->num_triangle].i[1] = start+3;
    tmp->triangle[tmp->num_triangle++].i[2] = start+5;
      
    tmp->triangle[tmp->num_triangle].i[0] = start+3;
    tmp->triangle[tmp->num_triangle].i[1] = start+1;
    tmp->triangle[tmp->num_triangle++].i[2] = start+4;
      
    tmp->triangle[tmp->num_triangle].i[0] = start+5;
    tmp->triangle[tmp->num_triangle].i[1] = start+4;
    tmp->triangle[tmp->num_triangle++].i[2] = start+2;
      
    tmp->triangle[tmp->num_triangle].i[0] = start+3;
    tmp->triangle[tmp->num_triangle].i[1] = start+4;
    tmp->triangle[tmp->num_triangle++].i[2] = start+5;
  }
  
  /* compress object eliminating double vertexes 
     (welcome to the not so smart section)
  */
  /* copy original triangle list */
  for (t=0; t<tmp->num_triangle; ++t) {
    ret->triangle[t] = tmp->triangle[t];
  }
  ret->num_triangle = tmp->num_triangle;
  
  /* copy unique vertexes and correct triangle list */
  for (v=0; v<tmp->num_vertex; ++v) {
    /* create list of vertexes that are the same */
    index_cnt = 0;
    for (i=0; i<tmp->num_vertex; ++i) {
      /* check if i and v are the same
         first in the list is the smallest index
      */
      if (vector_compare( &tmp->vertex[v], &tmp->vertex[i] )) {
        index_list[index_cnt++] = i;
      }
    }
    
    /* check if vertex unknown so far */
    index_found = 0;
    for (i=0; i<ret->num_vertex; ++i) {
      if (vector_compare( &ret->vertex[i],
          &tmp->vertex[index_list[0]] )) {
        index_found = 1;
        break;
      }
    }
    
    if (!index_found) {
      ret->vertex[ret->num_vertex] = tmp->vertex[index_list[0]];
      
      /* correct triangles 
         (we add an offset to the index, so we can tell them apart)
      */
      for (t=0; t<ret->num_triangle; ++t) {
        for (i=0; i<index_cnt; ++i) {
          if (ret->triangle[t].i[0] == index_list[i]) {
            ret->triangle[t].i[0] = ret->num_vertex+INDEX_OFFSET;
          }
          if (ret->triangle[t].i[1] == index_list[i]) {
            ret->triangle[t].i[1] = ret->num_vertex+INDEX_OFFSET;
          }
          if (ret->triangle[t].i[2] == index_list[i]) {
            ret->triangle[t].i[2] = ret->num_vertex+INDEX_OFFSET;
          }
        }
      }
      ret->num_vertex++;
    }
  }
  
  free_Object( tmp );
  
  /* correct index offset */
  for (t=0; t<ret->num_triangle; ++t) {
    ret->triangle[t].i[0] -= INDEX_OFFSET;
    ret->triangle[t].i[1] -= INDEX_OFFSET;
    ret->triangle[t].i[2] -= INDEX_OFFSET;
  }
  
  /* normalize vertexes */
  for (v=0; v<ret->num_vertex; ++v) {
    vector_normalize( &ret->vertex[v] );
  }
#ifdef PRINT_STAT
  fprintf( stderr, "out v=%d t=%d\n", 
           ret->num_vertex, ret->num_triangle );
#endif
  /* shrink the arrays by cloning */
  c_ret = clone_Object( ret );
  free_Object( ret );
  
  return c_ret;
}
Пример #7
0
void matrix_merge_sort_with(pk_internal_t* pk,
                            matrix_t* mata, matrix_t* matb)
{
    size_t i,ia,ib,j,k,nbrows,nbrowsa, nbcols;
    numint_t** numintpp;

    assert (mata->nbcolumns == matb->nbcolumns);
    assert (mata->_sorted && matb->_sorted);

    nbrowsa = mata->nbrows;
    nbcols = mata->nbcolumns;
    matrix_resize_rows_lazy(mata, nbrowsa + matb->nbrows);

    /* one adds the coefficients of matb to mata */
    for (i=0; i<matb->nbrows; i++) {
        for (j=0; j<nbcols; j++) {
            numint_set(mata->p[nbrowsa+i][j],matb->p[i][j]);
        }
    }
    /* now we fill numintpp, which will contain the unsorted rows */
    nbrows = nbrowsa + matb->nbrows;
    numintpp = malloc(nbrows*sizeof(numint_t*));
    for (i=0; i<nbrows; i++) {
        numintpp[i] = mata->p[i];
    }

    /* Now we fill mata->p from numintpp */
    ia = 0;
    ib = nbrowsa;
    i = 0;
    k = 0;
    while (ia < nbrowsa && ib < nbrows) {
        int res = vector_compare(pk,
                                 numintpp[ia],
                                 numintpp[ib],nbcols);
        if (res<=0) {
            mata->p[i] = numintpp[ia];
            ia++;
            if (res==0) {
                k++;
                mata->p[nbrows-k] = numintpp[ib];
                ib++;
            }
        }
        else {
            mata->p[i] = numintpp[ib];
            ib++;
        }
        i++;
    }
    /* Are there still constraints ? */
    while (ia < nbrowsa) {
        mata->p[i] = numintpp[ia];
        i++;
        ia++;
    }
    while (ib < nbrows) {
        mata->p[i] = numintpp[ib];
        i++;
        ib++;
    }
    mata->nbrows -= k;
    mata->_sorted = true;
    free(numintpp);
}
Пример #8
0
matrix_t* matrix_merge_sort(pk_internal_t* pk,
                            matrix_t* mata, matrix_t* matb)
{
    size_t i,ia,ib,l;
    matrix_t* mat;
    size_t nbrows;
    assert (mata->nbcolumns == matb->nbcolumns);
    if (!mata->_sorted || !matb->_sorted) {
        mat = matrix_append(mata,matb);
        matrix_sort_rows(pk,mat);
    }
    else {
        mat = _matrix_alloc_int(mata->nbrows+matb->nbrows,mata->nbcolumns,true);
        i = 0;
        ia = 0;
        ib = 0;
        while (ia < mata->nbrows && ib < matb->nbrows) {
            int res = vector_compare(pk,
                                     mata->p[ia],
                                     matb->p[ib],
                                     mat->nbcolumns);
            if (res<=0) {
                for (l=0; l<mat->nbcolumns; l++)
                    numint_init_set(mat->p[i][l],mata->p[ia][l]);
                ia++;
                if (res==0) ib++;
            }
            else {
                for (l=0; l<mat->nbcolumns; l++)
                    numint_init_set(mat->p[i][l],matb->p[ib][l]);
                ib++;
            }
            i++;
        }
        /* does some constraint remain ? */
        if (ia < mata->nbrows) {
            do {
                for (l=0; l<mat->nbcolumns; l++)
                    numint_init_set(mat->p[i][l],mata->p[ia][l]);
                ia++;
                i++;
            } while (ia < mata->nbrows);
        } else {
            while (ib < matb->nbrows) {
                for (l=0; l<mat->nbcolumns; l++)
                    numint_init_set(mat->p[i][l],matb->p[ib][l]);
                ib++;
                i++;
            }
        }
        nbrows = (size_t)i;
        /* initialize last rows of mat to zero */
        while (i<mat->nbrows) {
            for (l=0; l<mat->nbcolumns; l++)
                numint_init(mat->p[i][l]);
            i++;
        }
        mat->nbrows = nbrows;
    }

    return mat;
}