int main(int argc, char *argv[]) { double* matA = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64); double* matB = _mm_malloc((WIDTH*HEIGHT)*sizeof(double), 64); double* prod = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64); double* prod_ref = _mm_malloc(WIDTH*HEIGHT*sizeof(double), 64); int read_flag = read_matrix(TEST_FILENAME, prod_ref, matA, matB); if (read_flag == 1) printf("Cannot open test file\n"); else if (read_flag == 2) printf("Error while reading data from test file"); else if (read_flag == 3) printf("Error while closing the test file"); if (read_flag) return 0; uint64_t start = timestamp_us(); matmul_optimize(prod, matA, matB); /* run the optimization functions. */ uint64_t time = timestamp_us() - start; if (compare_matrix(prod, prod_ref)) { printf("%lu incorrect\n", time); } else { printf("%lu\n", time); } _mm_free(prod_ref); _mm_free(prod); _mm_free(matB); _mm_free(matA); return 0; }
int main(int argc, char** argv) { arg_size = 1024; if(argc > 1) arg_size = atoi(argv[1]); arg_cutoff_value = 64; if(argc > 2) arg_cutoff_value = atoi(argv[2]); if((arg_size & (arg_size - 1)) != 0 || (arg_size % 16) != 0) inncabs::error("Error: matrix size must be a power of 2 and a multiple of 16\n"); REAL *A = alloc_matrix(arg_size); REAL *B = alloc_matrix(arg_size); REAL *C = alloc_matrix(arg_size); REAL *D = alloc_matrix(arg_size); std::stringstream ss; ss << "Strassen Algorithm (" << arg_size << " x " << arg_size << " matrix with cutoff " << arg_cutoff_value << ") "; init_matrix(arg_size, A, arg_size); init_matrix(arg_size, B, arg_size); OptimizedStrassenMultiply_seq(D, A, B, arg_size, arg_size, arg_size, arg_size, 1); inncabs::run_all( [&](const std::launch l) { OptimizedStrassenMultiply_par(l, C, A, B, arg_size, arg_size, arg_size, arg_size, 1); return 1; }, [&](int result) { return compare_matrix(arg_size, C, arg_size, D, arg_size); }, ss.str() ); }
/* This function checks if a matrix is orthogonal * returns -1 if it can't be checked * returns 0 if it isn't orthogonal * returns 1 if it is orthogonal */ int is_orthogonal(struct matrix m) { if(m.rows!=m.columns) { printf("This matrix isn't square\n"); return -1; } struct matrix c=traspose(m); c=matrix_multiplication(m,c); return compare_matrix(c,identity_matrix(c.rows)); }
void work(int size) { matrix a, b, result1, result2, result3; // Allocate memory for matrices allocate_matrix(&a, size); allocate_matrix(&b, size); allocate_matrix(&result1, size); //allocate_matrix(&result2, size) ; allocate_matrix(&result3, size) ; // Initialize matrix elements init_matrix(a); init_matrix(b); init_matrix_zero(result1); // Perform sequential matrix multiplication mm(a, b, result1); //init_matrix_zero(result2); //mm_fast(a, b, result2) ; //if (compare_matrix(result1, result2)==1) // printf("true") ; //free_matrix(result2); init_matrix_zero(result3); mm_block(a, b, result3) ; if (compare_matrix(result1, result3)==1) printf("true") ; // Print the result1 matrix // print_matrix(result1); free_matrix(a); free_matrix(b); free_matrix(result1); free_matrix(result3); }
void CTestsTest::c_test_matrix_turn() { int size_of_matrix=3; int **matrix; int i, j, k=0; matrix = (int**)malloc(size_of_matrix*sizeof(int*)); for(i=0; i<size_of_matrix; i++) { matrix[i] = (int*)malloc(size_of_matrix*sizeof(int)); } for(i=0; i<size_of_matrix; i++) for(j=0; j<size_of_matrix; j++) { k++; matrix[i][j]=k; } int **matrix_r; matrix_r = (int**)malloc(size_of_matrix*sizeof(int*)); for(i=0; i<size_of_matrix; i++) { matrix_r[i] = (int*)malloc(size_of_matrix*sizeof(int)); } k=0; for(i=0; i<size_of_matrix; i++) for(j=0; j<size_of_matrix; j++) { k++; matrix_r[i][j]=k+size_of_matrix*2-4*j-2*i; } c_calc_matrix_turn(size_of_matrix, matrix); QCOMPARE(compare_matrix(matrix, matrix_r, size_of_matrix), 0); for(i=0; i<size_of_matrix; i++) { free(matrix[i]); free(matrix_r[i]); } free(matrix); free(matrix_r); }
int main(int argc, char *argv[]) { uint64_t* newimg = _mm_malloc(WIMAGE*HIMAGE*sizeof(uint64_t), 64); uint64_t* newimg_ref = _mm_malloc(WIMAGE*HIMAGE*sizeof(uint64_t), 64); uint16_t* filter = _mm_malloc(WFILTER*HFILTER*sizeof(uint16_t), 64); uint16_t* image = _mm_malloc((WIMAGE*HIMAGE+2*PAD_ZERO)*sizeof(uint16_t), 64); for (int i = 0; i < PAD_ZERO; i++) { /* PAD matrix2 with zero to ease programming the optimization functions. */ image[i] = 0; } image += PAD_ZERO; int read_flag = read_matrix(TEST_FILENAME, newimg_ref, filter, image); if (read_flag == 1) printf("Cannot open test file\n"); else if (read_flag == 2) printf("Error while reading data from test file"); else if (read_flag == 3) printf("Error while closing the test file"); if (read_flag) return 0; uint64_t start = timestamp_us(); matconv_optimize(newimg, filter, image); /* run the optimization functions. */ uint64_t time = timestamp_us() - start; if (compare_matrix(newimg, newimg_ref)) { printf("%lu incorrect\n", time); } else { printf("%lu\n", time); } _mm_free(filter); _mm_free(image-PAD_ZERO); _mm_free(newimg); _mm_free(newimg_ref); return 0; }
int main(int argc, char *argv[]) { int workers = 1; int dqsize = 100000; int verify = 0; char c; while ((c=getopt(argc, argv, "w:q:h:c")) != -1) { switch (c) { case 'w': workers = atoi(optarg); break; case 'q': dqsize = atoi(optarg); break; case 'c': verify = 1; break; case 'h': usage(argv[0]); break; default: abort(); } } if (optind == argc) { usage(argv[0]); exit(1); } int n = atoi(argv[optind]); lace_init(workers, dqsize); lace_startup(0, 0, 0); REAL *A, *B, *C1, *C2; if ((n & (n - 1)) != 0 || (n % 16) != 0) { printf("%d: matrix size must be a power of 2" " and a multiple of %d\n", n, 16); return 1; } A = alloc_matrix(n); B = alloc_matrix(n); C1 = alloc_matrix(n); C2 = alloc_matrix(n); init_matrix(n, A, n); init_matrix(n, B, n); LACE_ME; double t1=wctime(); CALL(OptimizedStrassenMultiply, C2, A, B, n, n, n, n); double t2=wctime(); if (verify) { matrixmul(n, A, n, B, n, C1, n); verify = compare_matrix(n, C1, n, C2, n); } if (verify) printf("WRONG RESULT!\n"); else { printf("Time: %f\n", t2-t1); } lace_exit(); return 0; }
int main(int argc, char *argv[]) { int n=get_num_ind(); int i,j; struct timeval tv1,tv2; adouble *xad; adouble fad; double f; double *x; x=new double[n]; xad=new adouble[n]; get_initial_value(x); printf("evaluating the function..."); trace_on(tag); for(i=0;i<n;i++) { xad[i] <<= x[i]; } fad=func_eval(xad); fad >>= f; trace_off(); printf("done!\n"); // printf("function value =<%10.20f>\n",f); // function(tag,1,n,x,&f); // printf("adolc func value=<%10.20f>\n",f); //tape_doc(tag,1,n,x,&f); #ifdef _compare_with_full double **H; H = myalloc2(n,n); printf("computing full hessain...."); gettimeofday(&tv1,NULL); hessian(tag,n,x,H); printf("done\n"); gettimeofday(&tv2,NULL); printf("Computing the full hessian cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #ifdef _PRINTOUT for(i=0;i<n;i++){ for(j=0;j<n;j++){ printf("H[%d][%d]=<%10.10f>",i,j,H[i][j]); } printf("\n"); } printf("\n"); #endif #endif #ifdef edge_pushing unsigned int *rind = NULL; unsigned int *cind = NULL; double *values = NULL; int nnz; int options[2]; options[0]=PRE_ACC; options[1]=COMPUT_GRAPH; gettimeofday(&tv1,NULL); // edge_hess(tag, 1, n, x, &nnz, &rind, &cind, &values, options); sparse_hess(tag,n,0,x, &nnz, &rind, &cind, &values, options); gettimeofday(&tv2,NULL); printf("Sparse Hessian: edge pushing cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #ifdef _PRINTOUT for(i=0;i<nnz;i++){ printf("<%d,%d>:<%10.10f>\n",cind[i],rind[i],values[i]); // printf("%d %d \n", rind[i], cind[i]); } #endif #endif #ifdef _compare_with_full #ifdef edge_pushing compare_matrix(n,H,nnz,cind,rind,values); #endif myfree2(H); #endif #ifdef edge_pushing printf("nnz=%d\n", nnz); free(rind); rind=NULL; free(cind); cind=NULL; free(values); values=NULL; #endif delete[] x; delete[] xad; return 0; }
int main(int argc, char *argv[]) { int n=NUM_IND; int i,j; struct timeval tv1,tv2; adouble *xad; adouble fad; double f; double *x; x=new double[n]; xad=new adouble[n]; get_initials(x, n); // printf("evaluating the function..."); trace_on(tag); for(i=0;i<n;i++) { xad[i] <<= x[i]; } fad=eval_func<adouble>(xad, n); fad >>= f; trace_off(); // printf("done!\n"); std::cout << "y = " << f << std::endl; #ifdef COMPARE_WITH_FULL_HESS double **H; H = myalloc2(n,n); printf("computing full hessain...."); gettimeofday(&tv1,NULL); hessian(tag,n,x,H); printf("done\n"); gettimeofday(&tv2,NULL); printf("Computing the full hessian cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #ifdef PRINT_RESULTS for(i=0;i<n;i++){ for(j=0;j<n;j++){ printf("H[%d][%d]=<%10.10f>",i,j,H[i][j]); } printf("\n"); } printf("\n"); #endif #endif unsigned int *rind = NULL; unsigned int *cind = NULL; double *values = NULL; int nnz; int options[2]; #ifdef LIVARH options[0]=0; options[1]=1; gettimeofday(&tv1,NULL); edge_hess(tag, 1, n, x, &nnz, &rind, &cind, &values, options); gettimeofday(&tv2,NULL); printf("Sparse Hessian: LivarH cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #endif #ifdef LIVARHACC options[0]=1; options[1]=1; gettimeofday(&tv1,NULL); edge_hess(tag, 1, n, x, &nnz, &rind, &cind, &values, options); gettimeofday(&tv2,NULL); printf("Sparse Hessian: LivarHACC cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #endif // Sparse ADOL-C drivers report the upper matrix #ifdef DIRECT options[0]=0; options[1]=1; gettimeofday(&tv1,NULL); sparse_hess(tag, n, 0, x, &nnz, &cind, &rind, &values, options); gettimeofday(&tv2,NULL); printf("Sparse Hessian: direct recovery cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #endif #ifdef INDIRECT options[0]=0; options[1]=0; gettimeofday(&tv1,NULL); sparse_hess(tag, n, 0, x, &nnz, &cind, &rind, &values, options); gettimeofday(&tv2,NULL); printf("Sparse Hessian: indirect recovery cost %10.6f seconds\n",(tv2.tv_sec-tv1.tv_sec)+(double)(tv2.tv_usec-tv1.tv_usec)/1000000); #endif #ifdef PRINT_RESULTS for(i=0;i<nnz;i++){ printf("<%d,%d>:<%10.10f>\n",rind[i],cind[i],values[i]); } #endif #ifdef COMPARE_WITH_FULL_HESS compare_matrix(n,H,nnz,rind,cind,values); myfree2(H); #endif free(rind); rind=NULL; free(cind); cind=NULL; free(values); values=NULL; delete[] x; delete[] xad; return 0; }