/* * bool testWithLength(int length) * * Creates three random vectors with the given length and times the performance of scalar_fma() and * vectorized_fma() with these vectors as input. * * Returns true if the results match. */ bool testWithLength(int length) { printf("Test length: %d\n", length); // Create (and clone) three vectors with the given length struct doubleVector a = random_vector(length); struct doubleVector b = random_vector(length); struct doubleVector c = random_vector(length); struct doubleVector a2 = vector_clone(&a); struct doubleVector b2 = vector_clone(&b); struct doubleVector c2 = vector_clone(&c); // Test scalar performance printf("\tScalar cycle count: %d\n", perfTest(&a, &b, &c, scalar_fma)); // Test vector performance printf("\tVector cycle count: %d\n", perfTest(&a2, &b2, &c2, vector_fma)); // Compare results bool correct = vector_compare(&a, &a2); printf("\t%s\n", correct ? "MATCH" : "NO MATCH"); // Free dynamic resources free(a.data); free(b.data); free(c.data); free(a2.data); free(b2.data); free(c2.data); return correct; }
static int qsort_rows_compar(void* qsort_man, void* pq1, void* pq2) { qsort_man_t* qm = (qsort_man_t*)qsort_man; numint_t* q1 = *((numint_t**)pq1); numint_t* q2 = *((numint_t**)pq2); return vector_compare(qm->pk,q1,q2,qm->size); }
int matrix_compare_rows(pk_internal_t* pk, matrix_t* mat, size_t l1, size_t l2) { return vector_compare(pk, mat->p[l1], mat->p[l2],mat->nbcolumns); }
static int qsort_rows_with_sat_compar(void* qsort_man, void* q1, void* q2) { qsort_man_t* qm = (qsort_man_t*)qsort_man; const qsort_t* qs1 = (const qsort_t*)q1; const qsort_t* qs2 = (const qsort_t*)q2; return vector_compare( qm->pk, qs1->p, qs2->p, qm->size ); }
void test_eigen(const std::string& fn, bool is_symm) { std::cout << "Reading..." << "\n"; std::size_t sz; // read file std::fstream f(fn.c_str(), std::fstream::in); //read size of input matrix read_matrix_size(f, sz); bool is_row = viennacl::is_row_major<MatrixLayout>::value; if (is_row) std::cout << "Testing row-major matrix of size " << sz << "-by-" << sz << std::endl; else std::cout << "Testing column-major matrix of size " << sz << "-by-" << sz << std::endl; viennacl::matrix<ScalarType> A_input(sz, sz), A_ref(sz, sz), Q(sz, sz); // reference vector with reference values from file std::vector<ScalarType> eigen_ref_re(sz); // calculated real eigenvalues std::vector<ScalarType> eigen_re(sz); // calculated im. eigenvalues std::vector<ScalarType> eigen_im(sz); // read input matrix from file read_matrix_body(f, A_input); // read reference eigenvalues from file read_vector_body(f, eigen_ref_re); f.close(); A_ref = A_input; std::cout << "Calculation..." << "\n"; Timer timer; timer.start(); // Start the calculation if(is_symm) viennacl::linalg::qr_method_sym(A_input, Q, eigen_re); else viennacl::linalg::qr_method_nsm(A_input, Q, eigen_re, eigen_im); /* std::cout << "\n\n Matrix A: \n\n"; matrix_print(A_input); std::cout << "\n\n"; std::cout << "\n\n Matrix Q: \n\n"; matrix_print(Q); std::cout << "\n\n"; */ double time_spend = timer.get(); std::cout << "Verification..." << "\n"; bool is_hessenberg = check_hessenberg(A_input); bool is_tridiag = check_tridiag(A_input); ublas::matrix<ScalarType> A_ref_ublas(sz, sz), A_input_ublas(sz, sz), Q_ublas(sz, sz), result1(sz, sz), result2(sz, sz); viennacl::copy(A_ref, A_ref_ublas); viennacl::copy(A_input, A_input_ublas); viennacl::copy(Q, Q_ublas); // compute result1 = ublas::prod(Q_ublas, A_input_ublas); (terribly slow when using ublas directly) for (std::size_t i=0; i<result1.size1(); ++i) for (std::size_t j=0; j<result1.size2(); ++j) { ScalarType value = 0; for (std::size_t k=0; k<Q_ublas.size2(); ++k) value += Q_ublas(i, k) * A_input_ublas(k, j); result1(i,j) = value; } // compute result2 = ublas::prod(A_ref_ublas, Q_ublas); (terribly slow when using ublas directly) for (std::size_t i=0; i<result2.size1(); ++i) for (std::size_t j=0; j<result2.size2(); ++j) { ScalarType value = 0; for (std::size_t k=0; k<A_ref_ublas.size2(); ++k) value += A_ref_ublas(i, k) * Q_ublas(k, j); result2(i,j) = value; } ScalarType prods_diff = matrix_compare(result1, result2); ScalarType eigen_diff = vector_compare(eigen_re, eigen_ref_re); bool is_ok = is_hessenberg; if(is_symm) is_ok = is_ok && is_tridiag; is_ok = is_ok && (eigen_diff < EPS); is_ok = is_ok && (prods_diff < EPS); // std::cout << A_ref << "\n"; // std::cout << A_input << "\n"; // std::cout << Q << "\n"; // std::cout << eigen_re << "\n"; // std::cout << eigen_im << "\n"; // std::cout << eigen_ref_re << "\n"; // std::cout << eigen_ref_im << "\n"; // std::cout << result1 << "\n"; // std::cout << result2 << "\n"; // std::cout << eigen_ref << "\n"; // std::cout << eigen << "\n"; printf("%6s [%dx%d] %40s time = %.4f\n", is_ok?"[[OK]]":"[FAIL]", (int)A_ref.size1(), (int)A_ref.size2(), fn.c_str(), time_spend); printf("tridiagonal = %d, hessenberg = %d prod-diff = %f eigen-diff = %f\n", is_tridiag, is_hessenberg, prods_diff, eigen_diff); std::cout << std::endl << std::endl; if (!is_ok) exit(EXIT_FAILURE); }
/* subdivide the triangles of the object once The order of this algorithm is probably something like O(n^42) :) but I can't think of something smarter at the moment */ static Object *subdivide( Object *obj ) { /* create for worst case (which I dont't know) */ int start, t, i, v; int index_list[1000]; int index_cnt, index_found; Object *tmp = (Object *)malloc( sizeof(Object) ); Object *ret = (Object *)malloc( sizeof(Object) ); Object *c_ret; tmp->vertex = (Vector *)malloc( 100*obj->num_vertex*sizeof( Vector ) ); tmp->triangle = (Triangle *)malloc( 4*obj->num_triangle*sizeof( Triangle ) ); tmp->num_vertex = 0; tmp->num_triangle = 0; ret->vertex = (Vector *)malloc( 100*obj->num_vertex*sizeof( Vector ) ); ret->triangle = (Triangle *)malloc( 4*obj->num_triangle*sizeof( Triangle ) ); ret->num_vertex = 0; ret->num_triangle = 0; #ifdef PRINT_STAT fprintf( stderr, "in v=%d t=%d\n", obj->num_vertex, obj->num_triangle ); #endif /* for each triangle create 3 new vertexes and the 4 corresponding triangles */ for (t=0; t<obj->num_triangle; ++t) { /* copy the three original vertexes */ for (i=0; i<3; ++i) { tmp->vertex[tmp->num_vertex++] = obj->vertex[obj->triangle[t].i[i]]; } /* create 3 new */ tmp->vertex[tmp->num_vertex] = obj->vertex[obj->triangle[t].i[0]]; vector_add( &tmp->vertex[tmp->num_vertex], &obj->vertex[obj->triangle[t].i[1]] ); vector_mul( &tmp->vertex[tmp->num_vertex++], 0.5 ); tmp->vertex[tmp->num_vertex] = obj->vertex[obj->triangle[t].i[1]]; vector_add( &tmp->vertex[tmp->num_vertex], &obj->vertex[obj->triangle[t].i[2]] ); vector_mul( &tmp->vertex[tmp->num_vertex++], 0.5 ); tmp->vertex[tmp->num_vertex] = obj->vertex[obj->triangle[t].i[2]]; vector_add( &tmp->vertex[tmp->num_vertex], &obj->vertex[obj->triangle[t].i[0]] ); vector_mul( &tmp->vertex[tmp->num_vertex++], 0.5 ); /* create triangles */ start = tmp->num_vertex-6; tmp->triangle[tmp->num_triangle].i[0] = start; tmp->triangle[tmp->num_triangle].i[1] = start+3; tmp->triangle[tmp->num_triangle++].i[2] = start+5; tmp->triangle[tmp->num_triangle].i[0] = start+3; tmp->triangle[tmp->num_triangle].i[1] = start+1; tmp->triangle[tmp->num_triangle++].i[2] = start+4; tmp->triangle[tmp->num_triangle].i[0] = start+5; tmp->triangle[tmp->num_triangle].i[1] = start+4; tmp->triangle[tmp->num_triangle++].i[2] = start+2; tmp->triangle[tmp->num_triangle].i[0] = start+3; tmp->triangle[tmp->num_triangle].i[1] = start+4; tmp->triangle[tmp->num_triangle++].i[2] = start+5; } /* compress object eliminating double vertexes (welcome to the not so smart section) */ /* copy original triangle list */ for (t=0; t<tmp->num_triangle; ++t) { ret->triangle[t] = tmp->triangle[t]; } ret->num_triangle = tmp->num_triangle; /* copy unique vertexes and correct triangle list */ for (v=0; v<tmp->num_vertex; ++v) { /* create list of vertexes that are the same */ index_cnt = 0; for (i=0; i<tmp->num_vertex; ++i) { /* check if i and v are the same first in the list is the smallest index */ if (vector_compare( &tmp->vertex[v], &tmp->vertex[i] )) { index_list[index_cnt++] = i; } } /* check if vertex unknown so far */ index_found = 0; for (i=0; i<ret->num_vertex; ++i) { if (vector_compare( &ret->vertex[i], &tmp->vertex[index_list[0]] )) { index_found = 1; break; } } if (!index_found) { ret->vertex[ret->num_vertex] = tmp->vertex[index_list[0]]; /* correct triangles (we add an offset to the index, so we can tell them apart) */ for (t=0; t<ret->num_triangle; ++t) { for (i=0; i<index_cnt; ++i) { if (ret->triangle[t].i[0] == index_list[i]) { ret->triangle[t].i[0] = ret->num_vertex+INDEX_OFFSET; } if (ret->triangle[t].i[1] == index_list[i]) { ret->triangle[t].i[1] = ret->num_vertex+INDEX_OFFSET; } if (ret->triangle[t].i[2] == index_list[i]) { ret->triangle[t].i[2] = ret->num_vertex+INDEX_OFFSET; } } } ret->num_vertex++; } } free_Object( tmp ); /* correct index offset */ for (t=0; t<ret->num_triangle; ++t) { ret->triangle[t].i[0] -= INDEX_OFFSET; ret->triangle[t].i[1] -= INDEX_OFFSET; ret->triangle[t].i[2] -= INDEX_OFFSET; } /* normalize vertexes */ for (v=0; v<ret->num_vertex; ++v) { vector_normalize( &ret->vertex[v] ); } #ifdef PRINT_STAT fprintf( stderr, "out v=%d t=%d\n", ret->num_vertex, ret->num_triangle ); #endif /* shrink the arrays by cloning */ c_ret = clone_Object( ret ); free_Object( ret ); return c_ret; }
void matrix_merge_sort_with(pk_internal_t* pk, matrix_t* mata, matrix_t* matb) { size_t i,ia,ib,j,k,nbrows,nbrowsa, nbcols; numint_t** numintpp; assert (mata->nbcolumns == matb->nbcolumns); assert (mata->_sorted && matb->_sorted); nbrowsa = mata->nbrows; nbcols = mata->nbcolumns; matrix_resize_rows_lazy(mata, nbrowsa + matb->nbrows); /* one adds the coefficients of matb to mata */ for (i=0; i<matb->nbrows; i++) { for (j=0; j<nbcols; j++) { numint_set(mata->p[nbrowsa+i][j],matb->p[i][j]); } } /* now we fill numintpp, which will contain the unsorted rows */ nbrows = nbrowsa + matb->nbrows; numintpp = malloc(nbrows*sizeof(numint_t*)); for (i=0; i<nbrows; i++) { numintpp[i] = mata->p[i]; } /* Now we fill mata->p from numintpp */ ia = 0; ib = nbrowsa; i = 0; k = 0; while (ia < nbrowsa && ib < nbrows) { int res = vector_compare(pk, numintpp[ia], numintpp[ib],nbcols); if (res<=0) { mata->p[i] = numintpp[ia]; ia++; if (res==0) { k++; mata->p[nbrows-k] = numintpp[ib]; ib++; } } else { mata->p[i] = numintpp[ib]; ib++; } i++; } /* Are there still constraints ? */ while (ia < nbrowsa) { mata->p[i] = numintpp[ia]; i++; ia++; } while (ib < nbrows) { mata->p[i] = numintpp[ib]; i++; ib++; } mata->nbrows -= k; mata->_sorted = true; free(numintpp); }
matrix_t* matrix_merge_sort(pk_internal_t* pk, matrix_t* mata, matrix_t* matb) { size_t i,ia,ib,l; matrix_t* mat; size_t nbrows; assert (mata->nbcolumns == matb->nbcolumns); if (!mata->_sorted || !matb->_sorted) { mat = matrix_append(mata,matb); matrix_sort_rows(pk,mat); } else { mat = _matrix_alloc_int(mata->nbrows+matb->nbrows,mata->nbcolumns,true); i = 0; ia = 0; ib = 0; while (ia < mata->nbrows && ib < matb->nbrows) { int res = vector_compare(pk, mata->p[ia], matb->p[ib], mat->nbcolumns); if (res<=0) { for (l=0; l<mat->nbcolumns; l++) numint_init_set(mat->p[i][l],mata->p[ia][l]); ia++; if (res==0) ib++; } else { for (l=0; l<mat->nbcolumns; l++) numint_init_set(mat->p[i][l],matb->p[ib][l]); ib++; } i++; } /* does some constraint remain ? */ if (ia < mata->nbrows) { do { for (l=0; l<mat->nbcolumns; l++) numint_init_set(mat->p[i][l],mata->p[ia][l]); ia++; i++; } while (ia < mata->nbrows); } else { while (ib < matb->nbrows) { for (l=0; l<mat->nbcolumns; l++) numint_init_set(mat->p[i][l],matb->p[ib][l]); ib++; i++; } } nbrows = (size_t)i; /* initialize last rows of mat to zero */ while (i<mat->nbrows) { for (l=0; l<mat->nbcolumns; l++) numint_init(mat->p[i][l]); i++; } mat->nbrows = nbrows; } return mat; }