//-------------------------------------------------------- // global element = global element //-------------------------------------------------------- void gmove_ge_ge(){ init_a(); init_b(); #pragma xmp barrier #pragma xmp task on p1 { #ifdef _MPI3 #pragma xmp gmove in a[7][8][9] = b[3][4][5]; #endif #pragma xmp barrier #pragma xmp task on t1(7,8,9) nocomm { if (a[7][8][9] != 3*10000 + 4*100 + 5){ printf("ERROR in gmove_ge_ge\n"); exit(1); } } } }
int main(int argc, char *argv[]) { int n, i, diag_size, mid_size, short_size, execs = 0; int *diag, *l_upper, *l_lower, *u_upper, *u_lower; double *x, *newx, *b, *true_x; double residual = 10000; struct timeb start, stop; int time_diff; if (argc != 2) { printf("Format: jacobi <n>\n"); return 1; } n = atoi(argv[1]); //start timing here ftime(&start); gen_laplace_mat(n, &diag, &l_upper, &l_lower, &u_upper, &u_lower); diag_size = n * n; mid_size = n * n - 1; short_size = n * n - n; x = malloc(diag_size * sizeof(double)); newx = malloc(diag_size * sizeof(double)); true_x = malloc(diag_size * sizeof(double)); b = malloc(diag_size * sizeof(double)); init_x(diag_size, x); init_true_x(diag_size, true_x); init_b(n, diag_size, mid_size, short_size, diag, l_upper, l_lower, u_upper, u_lower, true_x, b); while (residual > 0.001 && execs < 100000) { calc_newx(n, diag_size, mid_size, short_size, diag, l_upper, l_lower, u_upper, u_lower, x, b, newx); for (i = 0; i < diag_size; i++) { x[i] = newx[i]; } residual = calc_resid(n, diag_size, mid_size, short_size, diag, l_upper, l_lower, u_upper, u_lower, x, b); execs++; } // end timing here ftime(&stop); time_diff = (int)(1000.0 * (stop.time - start.time) + (stop.millitm - start.millitm)); printf("Finished in %u milliseconds.\n", time_diff); printf("In %d iterations, got residual of %f\n", execs, residual); return 0; }
/** * main application * * @param argc number of cli arguments * @param argv values of cli arguments */ int main(int argc, char* argv[]) { if (argc != 6) { std::cout << "cg_max_iterations" << std::endl; std::cout << "cg_eps" << std::endl; std::cout << "mpiGridX" << std::endl; std::cout << "mpiGridY" << std::endl; std::cout << "gridwidth" << std::endl; std::cout << std::endl; std::cout << "example:" << std::endl; std::cout << "./app 10 1e-5 2 5 128" << std::endl; return -1; } // input parameters size_t cg_max_iterations = atoi(argv[1]); double cg_eps = atof(argv[2]); const int mpiGridX = atoi(argv[3]); const int mpiGridY = atoi(argv[4]); grid_points_1d = adaptMeshSize(mpiGridX, mpiGridY, atoi(argv[5])); std::printf("max_iter: %d, eps: %f, grid: (%d, %d), n: %d \n", static_cast<int>(cg_max_iterations), cg_eps, mpiGridX, mpiGridY, static_cast<int>(grid_points_1d)); double* gridS = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); double* bS = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); // TEST single // initialize the gird and rights hand side init_grid(gridS); init_b(bS); // solve Poisson equation using CG method Timer tS; tS.start(); single::solve(gridS, bS, cg_max_iterations, cg_eps); double timeS = tS.stop(); std::cout << std::endl << "Needed time single: " << timeS << " s" << std::endl << std::endl; _mm_free(gridS); _mm_free(bS); return 0; }
//-------------------------------------------------------- // local section = global element //-------------------------------------------------------- void gmove_ls_ge(){ int result = 0; init_x0(); init_b(); #pragma xmp barrier #pragma xmp task on p1 { #ifdef _MPI3 #pragma xmp gmove in x[0:N/4][N/2:N/2][4:N-5] = b[3][4][5]; #endif #pragma xmp barrier for (int i = 0; i < N/4; i++){ for (int j = N/2; j < N; j++){ for (int k = 4; k < N-1; k++){ if (x[i][j][k] != 3*10000 + 4*100 + 5){ //printf("(%d, %d, %d) %d\n", i, j, k, x[i][j][k]); result = 1; } } } } #pragma xmp reduction (+:result) #pragma xmp task on p1(1,1) nocomm { if (result != 0){ printf("ERROR in gmove_ls_ge\n"); exit(1); } } } }
//-------------------------------------------------------- // local section = global section //-------------------------------------------------------- void gmove_ls_gs(){ int result = 0; init_x0(); init_b(); #pragma xmp barrier #pragma xmp task on p1 { #ifdef _MPI3 #pragma xmp gmove in x[0:N/4][N/2:N/2][4:N-5] = b[N/2:N/4:2][0:N/2][0:N-5]; #endif #pragma xmp barrier for (int i = 0; i < N/4; i++){ for (int j = N/2; j < N; j++){ for (int k = 4; k < N-1; k++){ if (x[i][j][k] != (N/2+i*2)*10000 + (j-N/2)*100 + (k-4)){ result = 1; } } } } #pragma xmp reduction (+:result) #pragma xmp task on p1(1,1) nocomm { if (result != 0){ printf("ERROR in gmove_ls_gs\n"); exit(1); } } } }
//-------------------------------------------------------- // global section = global element //-------------------------------------------------------- void gmove_gs_ge(){ int result = 0; init_a(); init_b(); #pragma xmp barrier #pragma xmp task on p1 { #ifdef _MPI3 #pragma xmp gmove in a[0:N/4][N/2:N/2][4:N-5] = b[3][4][5]; #endif #pragma xmp barrier #pragma xmp loop (i,j,k) on t1(i,j,k) reduction(+:result) for (int i = 0; i < N/4; i++){ for (int j = N/2; j < N; j++){ for (int k = 4; k < N-1; k++){ if (a[i][j][k] != 3*10000 + 4*100 + 5){ result = 1; } } } } #pragma xmp task on p1(1,1) nocomm { if (result != 0){ printf("ERROR in gmove_gs_ge\n"); exit(1); } } } }
//-------------------------------------------------------- // scalar = global element //-------------------------------------------------------- void gmove_s_ge(){ int result = 0; s = 0; init_b(); #pragma xmp barrier #pragma xmp task on p1 { #ifdef _MPI3 #pragma xmp gmove in s = b[3][4][5]; #endif #pragma xmp barrier #pragma xmp barrier if (s != 3*10000 + 4*100 + 5){ result = 1; } #pragma xmp reduction (+:result) #pragma xmp task on p1(1,1) nocomm { if (result != 0){ printf("ERROR in gmove_s_ge\n"); exit(1); } } } }
int main(int argc, char **argv) { t_all *all; if (argc > 1) { if (!(all = init_check_option(argv))) return (error_return()); if ((all->a = check_stock(argv)) == NULL) return (error_return()); if (LENA <= 0) return (error_return()); if ((all->b = init_b(all->a->len + 1)) == NULL) return (error_return()); put_state(all, 1); sort(all); put_state(all, 2); if (all->opo == 1) ft_printf("Operation : %d\n", all->op); } else return (error_return()); return (0); }
int main(void) { /* Local scalars */ char uplo, uplo_i; char trans, trans_i; char diag, diag_i; lapack_int n, n_i; lapack_int nrhs, nrhs_i; lapack_int ldb, ldb_i; lapack_int ldb_r; lapack_int ldx, ldx_i; lapack_int ldx_r; lapack_int info, info_i; lapack_int i; int failed; /* Local arrays */ float *ap = NULL, *ap_i = NULL; float *b = NULL, *b_i = NULL; float *x = NULL, *x_i = NULL; float *ferr = NULL, *ferr_i = NULL; float *berr = NULL, *berr_i = NULL; float *work = NULL, *work_i = NULL; lapack_int *iwork = NULL, *iwork_i = NULL; float *ferr_save = NULL; float *berr_save = NULL; float *ap_r = NULL; float *b_r = NULL; float *x_r = NULL; /* Iniitialize the scalar parameters */ init_scalars_stprfs( &uplo, &trans, &diag, &n, &nrhs, &ldb, &ldx ); ldb_r = nrhs+2; ldx_r = nrhs+2; uplo_i = uplo; trans_i = trans; diag_i = diag; n_i = n; nrhs_i = nrhs; ldb_i = ldb; ldx_i = ldx; /* Allocate memory for the LAPACK routine arrays */ ap = (float *)LAPACKE_malloc( ((n*(n+1)/2)) * sizeof(float) ); b = (float *)LAPACKE_malloc( ldb*nrhs * sizeof(float) ); x = (float *)LAPACKE_malloc( ldx*nrhs * sizeof(float) ); ferr = (float *)LAPACKE_malloc( nrhs * sizeof(float) ); berr = (float *)LAPACKE_malloc( nrhs * sizeof(float) ); work = (float *)LAPACKE_malloc( 3*n * sizeof(float) ); iwork = (lapack_int *)LAPACKE_malloc( n * sizeof(lapack_int) ); /* Allocate memory for the C interface function arrays */ ap_i = (float *)LAPACKE_malloc( ((n*(n+1)/2)) * sizeof(float) ); b_i = (float *)LAPACKE_malloc( ldb*nrhs * sizeof(float) ); x_i = (float *)LAPACKE_malloc( ldx*nrhs * sizeof(float) ); ferr_i = (float *)LAPACKE_malloc( nrhs * sizeof(float) ); berr_i = (float *)LAPACKE_malloc( nrhs * sizeof(float) ); work_i = (float *)LAPACKE_malloc( 3*n * sizeof(float) ); iwork_i = (lapack_int *)LAPACKE_malloc( n * sizeof(lapack_int) ); /* Allocate memory for the backup arrays */ ferr_save = (float *)LAPACKE_malloc( nrhs * sizeof(float) ); berr_save = (float *)LAPACKE_malloc( nrhs * sizeof(float) ); /* Allocate memory for the row-major arrays */ ap_r = (float *)LAPACKE_malloc( n*(n+1)/2 * sizeof(float) ); b_r = (float *)LAPACKE_malloc( n*(nrhs+2) * sizeof(float) ); x_r = (float *)LAPACKE_malloc( n*(nrhs+2) * sizeof(float) ); /* Initialize input arrays */ init_ap( (n*(n+1)/2), ap ); init_b( ldb*nrhs, b ); init_x( ldx*nrhs, x ); init_ferr( nrhs, ferr ); init_berr( nrhs, berr ); init_work( 3*n, work ); init_iwork( n, iwork ); /* Backup the ouptut arrays */ for( i = 0; i < nrhs; i++ ) { ferr_save[i] = ferr[i]; } for( i = 0; i < nrhs; i++ ) { berr_save[i] = berr[i]; } /* Call the LAPACK routine */ stprfs_( &uplo, &trans, &diag, &n, &nrhs, ap, b, &ldb, x, &ldx, ferr, berr, work, iwork, &info ); /* Initialize input data, call the column-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 3*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { iwork_i[i] = iwork[i]; } info_i = LAPACKE_stprfs_work( LAPACK_COL_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, ap_i, b_i, ldb_i, x_i, ldx_i, ferr_i, berr_i, work_i, iwork_i ); failed = compare_stprfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major middle-level interface to stprfs\n" ); } else { printf( "FAILED: column-major middle-level interface to stprfs\n" ); } /* Initialize input data, call the column-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 3*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { iwork_i[i] = iwork[i]; } info_i = LAPACKE_stprfs( LAPACK_COL_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, ap_i, b_i, ldb_i, x_i, ldx_i, ferr_i, berr_i ); failed = compare_stprfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major high-level interface to stprfs\n" ); } else { printf( "FAILED: column-major high-level interface to stprfs\n" ); } /* Initialize input data, call the row-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 3*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { iwork_i[i] = iwork[i]; } LAPACKE_spp_trans( LAPACK_COL_MAJOR, uplo, n, ap_i, ap_r ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, x_i, ldx, x_r, nrhs+2 ); info_i = LAPACKE_stprfs_work( LAPACK_ROW_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, ap_r, b_r, ldb_r, x_r, ldx_r, ferr_i, berr_i, work_i, iwork_i ); failed = compare_stprfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major middle-level interface to stprfs\n" ); } else { printf( "FAILED: row-major middle-level interface to stprfs\n" ); } /* Initialize input data, call the row-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 3*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { iwork_i[i] = iwork[i]; } /* Init row_major arrays */ LAPACKE_spp_trans( LAPACK_COL_MAJOR, uplo, n, ap_i, ap_r ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, x_i, ldx, x_r, nrhs+2 ); info_i = LAPACKE_stprfs( LAPACK_ROW_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, ap_r, b_r, ldb_r, x_r, ldx_r, ferr_i, berr_i ); failed = compare_stprfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major high-level interface to stprfs\n" ); } else { printf( "FAILED: row-major high-level interface to stprfs\n" ); } /* Release memory */ if( ap != NULL ) { LAPACKE_free( ap ); } if( ap_i != NULL ) { LAPACKE_free( ap_i ); } if( ap_r != NULL ) { LAPACKE_free( ap_r ); } if( b != NULL ) { LAPACKE_free( b ); } if( b_i != NULL ) { LAPACKE_free( b_i ); } if( b_r != NULL ) { LAPACKE_free( b_r ); } if( x != NULL ) { LAPACKE_free( x ); } if( x_i != NULL ) { LAPACKE_free( x_i ); } if( x_r != NULL ) { LAPACKE_free( x_r ); } if( ferr != NULL ) { LAPACKE_free( ferr ); } if( ferr_i != NULL ) { LAPACKE_free( ferr_i ); } if( ferr_save != NULL ) { LAPACKE_free( ferr_save ); } if( berr != NULL ) { LAPACKE_free( berr ); } if( berr_i != NULL ) { LAPACKE_free( berr_i ); } if( berr_save != NULL ) { LAPACKE_free( berr_save ); } if( work != NULL ) { LAPACKE_free( work ); } if( work_i != NULL ) { LAPACKE_free( work_i ); } if( iwork != NULL ) { LAPACKE_free( iwork ); } if( iwork_i != NULL ) { LAPACKE_free( iwork_i ); } return 0; }
int main(void) { /* Local scalars */ char uplo, uplo_i; lapack_int n, n_i; lapack_int nrhs, nrhs_i; lapack_int ldb, ldb_i; lapack_int ldb_r; lapack_int info, info_i; lapack_int i; int failed; /* Local arrays */ float *ap = NULL, *ap_i = NULL; lapack_int *ipiv = NULL, *ipiv_i = NULL; float *b = NULL, *b_i = NULL; float *b_save = NULL; float *ap_r = NULL; float *b_r = NULL; /* Iniitialize the scalar parameters */ init_scalars_ssptrs( &uplo, &n, &nrhs, &ldb ); ldb_r = nrhs+2; uplo_i = uplo; n_i = n; nrhs_i = nrhs; ldb_i = ldb; /* Allocate memory for the LAPACK routine arrays */ ap = (float *)LAPACKE_malloc( ((n*(n+1)/2)) * sizeof(float) ); ipiv = (lapack_int *)LAPACKE_malloc( n * sizeof(lapack_int) ); b = (float *)LAPACKE_malloc( ldb*nrhs * sizeof(float) ); /* Allocate memory for the C interface function arrays */ ap_i = (float *)LAPACKE_malloc( ((n*(n+1)/2)) * sizeof(float) ); ipiv_i = (lapack_int *)LAPACKE_malloc( n * sizeof(lapack_int) ); b_i = (float *)LAPACKE_malloc( ldb*nrhs * sizeof(float) ); /* Allocate memory for the backup arrays */ b_save = (float *)LAPACKE_malloc( ldb*nrhs * sizeof(float) ); /* Allocate memory for the row-major arrays */ ap_r = (float *)LAPACKE_malloc( n*(n+1)/2 * sizeof(float) ); b_r = (float *)LAPACKE_malloc( n*(nrhs+2) * sizeof(float) ); /* Initialize input arrays */ init_ap( (n*(n+1)/2), ap ); init_ipiv( n, ipiv ); init_b( ldb*nrhs, b ); /* Backup the ouptut arrays */ for( i = 0; i < ldb*nrhs; i++ ) { b_save[i] = b[i]; } /* Call the LAPACK routine */ ssptrs_( &uplo, &n, &nrhs, ap, ipiv, b, &ldb, &info ); /* Initialize input data, call the column-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } info_i = LAPACKE_ssptrs_work( LAPACK_COL_MAJOR, uplo_i, n_i, nrhs_i, ap_i, ipiv_i, b_i, ldb_i ); failed = compare_ssptrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major middle-level interface to ssptrs\n" ); } else { printf( "FAILED: column-major middle-level interface to ssptrs\n" ); } /* Initialize input data, call the column-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } info_i = LAPACKE_ssptrs( LAPACK_COL_MAJOR, uplo_i, n_i, nrhs_i, ap_i, ipiv_i, b_i, ldb_i ); failed = compare_ssptrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major high-level interface to ssptrs\n" ); } else { printf( "FAILED: column-major high-level interface to ssptrs\n" ); } /* Initialize input data, call the row-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } LAPACKE_spp_trans( LAPACK_COL_MAJOR, uplo, n, ap_i, ap_r ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); info_i = LAPACKE_ssptrs_work( LAPACK_ROW_MAJOR, uplo_i, n_i, nrhs_i, ap_r, ipiv_i, b_r, ldb_r ); LAPACKE_sge_trans( LAPACK_ROW_MAJOR, n, nrhs, b_r, nrhs+2, b_i, ldb ); failed = compare_ssptrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major middle-level interface to ssptrs\n" ); } else { printf( "FAILED: row-major middle-level interface to ssptrs\n" ); } /* Initialize input data, call the row-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < (n*(n+1)/2); i++ ) { ap_i[i] = ap[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } /* Init row_major arrays */ LAPACKE_spp_trans( LAPACK_COL_MAJOR, uplo, n, ap_i, ap_r ); LAPACKE_sge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); info_i = LAPACKE_ssptrs( LAPACK_ROW_MAJOR, uplo_i, n_i, nrhs_i, ap_r, ipiv_i, b_r, ldb_r ); LAPACKE_sge_trans( LAPACK_ROW_MAJOR, n, nrhs, b_r, nrhs+2, b_i, ldb ); failed = compare_ssptrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major high-level interface to ssptrs\n" ); } else { printf( "FAILED: row-major high-level interface to ssptrs\n" ); } /* Release memory */ if( ap != NULL ) { LAPACKE_free( ap ); } if( ap_i != NULL ) { LAPACKE_free( ap_i ); } if( ap_r != NULL ) { LAPACKE_free( ap_r ); } if( ipiv != NULL ) { LAPACKE_free( ipiv ); } if( ipiv_i != NULL ) { LAPACKE_free( ipiv_i ); } if( b != NULL ) { LAPACKE_free( b ); } if( b_i != NULL ) { LAPACKE_free( b_i ); } if( b_r != NULL ) { LAPACKE_free( b_r ); } if( b_save != NULL ) { LAPACKE_free( b_save ); } return 0; }
/** * main application * * @param argc number of cli arguments * @param argv values of cli arguments */ int main(int argc, char* argv[]) { if (argc != 6) { std::cout << "cg_max_iterations" << std::endl; std::cout << "cg_eps" << std::endl; std::cout << "mpiGridX" << std::endl; std::cout << "mpiGridY" << std::endl; std::cout << "gridwidth" << std::endl; std::cout << std::endl; std::cout << "example:" << std::endl; std::cout << "./app 10 1e-5 2 5 128" << std::endl; return -1; } MPI_Init(&argc, &argv); int rank = 0; MPI_Comm_rank(MPI_COMM_WORLD, &rank); // input parameters size_t cg_max_iterations = atoi(argv[1]); double cg_eps = atof(argv[2]); const int mpiGridX = atoi(argv[3]); const int mpiGridY = atoi(argv[4]); grid_points_1d = adaptMeshSize(mpiGridX, mpiGridY, atoi(argv[5])); if(rank == 0) std::printf("max_iter: %d, eps: %f, grid: (%d, %d), n: %d \n", static_cast<int>(cg_max_iterations), cg_eps, mpiGridX, mpiGridY, static_cast<int>(grid_points_1d)); // check consistency with number of processes int numProcesses; MPI_Comm_size(MPI_COMM_WORLD, &numProcesses); assert(mpiGridX*mpiGridY == numProcesses && "Your grid size must correspond to the number of processes!"); double* gridS = NULL; double* bS = NULL; // single core function is only run by master if(rank == 0) { gridS = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); bS = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); // TEST single // initialize the gird and rights hand side init_grid(gridS); init_b(bS); // solve Poisson equation using CG method Timer tS; tS.start(); single::solve(gridS, bS, cg_max_iterations, cg_eps); double timeS = tS.stop(); std::cout << std::endl << "Needed time single: " << timeS << " s" << std::endl << std::endl; } MPI_Barrier(MPI_COMM_WORLD); // TEST multi // initialize the gird and rights hand side double* gridM = NULL; double* bM = NULL; if(rank == 0) { gridM = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); bM = (double*)_mm_malloc(grid_points_1d*grid_points_1d*sizeof(double), 64); init_grid(gridM); init_b(bM); } // solve Poisson equation using CG method Timer tM; tM.start(); multi::solve(mpiGridX, mpiGridY, gridM, bM, cg_max_iterations, cg_eps); double timeM = tM.stop(); if(rank == 0){ std::cout << std::endl << "Needed time multi: " << timeM << " s" << std::endl << std::endl; assertMatricesEqual(gridS, gridM, grid_points_1d); std::cout << "Assert ok!" << std::endl; _mm_free(gridS); _mm_free(bS); _mm_free(gridM); _mm_free(bM); } MPI_Finalize(); return 0; }
int main(void) { /* Local scalars */ char uplo, uplo_i; char trans, trans_i; char diag, diag_i; lapack_int n, n_i; lapack_int nrhs, nrhs_i; lapack_int lda, lda_i; lapack_int lda_r; lapack_int ldb, ldb_i; lapack_int ldb_r; lapack_int ldx, ldx_i; lapack_int ldx_r; lapack_int info, info_i; lapack_int i; int failed; /* Local arrays */ lapack_complex_double *a = NULL, *a_i = NULL; lapack_complex_double *b = NULL, *b_i = NULL; lapack_complex_double *x = NULL, *x_i = NULL; double *ferr = NULL, *ferr_i = NULL; double *berr = NULL, *berr_i = NULL; lapack_complex_double *work = NULL, *work_i = NULL; double *rwork = NULL, *rwork_i = NULL; double *ferr_save = NULL; double *berr_save = NULL; lapack_complex_double *a_r = NULL; lapack_complex_double *b_r = NULL; lapack_complex_double *x_r = NULL; /* Iniitialize the scalar parameters */ init_scalars_ztrrfs( &uplo, &trans, &diag, &n, &nrhs, &lda, &ldb, &ldx ); lda_r = n+2; ldb_r = nrhs+2; ldx_r = nrhs+2; uplo_i = uplo; trans_i = trans; diag_i = diag; n_i = n; nrhs_i = nrhs; lda_i = lda; ldb_i = ldb; ldx_i = ldx; /* Allocate memory for the LAPACK routine arrays */ a = (lapack_complex_double *) LAPACKE_malloc( lda*n * sizeof(lapack_complex_double) ); b = (lapack_complex_double *) LAPACKE_malloc( ldb*nrhs * sizeof(lapack_complex_double) ); x = (lapack_complex_double *) LAPACKE_malloc( ldx*nrhs * sizeof(lapack_complex_double) ); ferr = (double *)LAPACKE_malloc( nrhs * sizeof(double) ); berr = (double *)LAPACKE_malloc( nrhs * sizeof(double) ); work = (lapack_complex_double *) LAPACKE_malloc( 2*n * sizeof(lapack_complex_double) ); rwork = (double *)LAPACKE_malloc( n * sizeof(double) ); /* Allocate memory for the C interface function arrays */ a_i = (lapack_complex_double *) LAPACKE_malloc( lda*n * sizeof(lapack_complex_double) ); b_i = (lapack_complex_double *) LAPACKE_malloc( ldb*nrhs * sizeof(lapack_complex_double) ); x_i = (lapack_complex_double *) LAPACKE_malloc( ldx*nrhs * sizeof(lapack_complex_double) ); ferr_i = (double *)LAPACKE_malloc( nrhs * sizeof(double) ); berr_i = (double *)LAPACKE_malloc( nrhs * sizeof(double) ); work_i = (lapack_complex_double *) LAPACKE_malloc( 2*n * sizeof(lapack_complex_double) ); rwork_i = (double *)LAPACKE_malloc( n * sizeof(double) ); /* Allocate memory for the backup arrays */ ferr_save = (double *)LAPACKE_malloc( nrhs * sizeof(double) ); berr_save = (double *)LAPACKE_malloc( nrhs * sizeof(double) ); /* Allocate memory for the row-major arrays */ a_r = (lapack_complex_double *) LAPACKE_malloc( n*(n+2) * sizeof(lapack_complex_double) ); b_r = (lapack_complex_double *) LAPACKE_malloc( n*(nrhs+2) * sizeof(lapack_complex_double) ); x_r = (lapack_complex_double *) LAPACKE_malloc( n*(nrhs+2) * sizeof(lapack_complex_double) ); /* Initialize input arrays */ init_a( lda*n, a ); init_b( ldb*nrhs, b ); init_x( ldx*nrhs, x ); init_ferr( nrhs, ferr ); init_berr( nrhs, berr ); init_work( 2*n, work ); init_rwork( n, rwork ); /* Backup the ouptut arrays */ for( i = 0; i < nrhs; i++ ) { ferr_save[i] = ferr[i]; } for( i = 0; i < nrhs; i++ ) { berr_save[i] = berr[i]; } /* Call the LAPACK routine */ ztrrfs_( &uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, x, &ldx, ferr, berr, work, rwork, &info ); /* Initialize input data, call the column-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 2*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { rwork_i[i] = rwork[i]; } info_i = LAPACKE_ztrrfs_work( LAPACK_COL_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, a_i, lda_i, b_i, ldb_i, x_i, ldx_i, ferr_i, berr_i, work_i, rwork_i ); failed = compare_ztrrfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major middle-level interface to ztrrfs\n" ); } else { printf( "FAILED: column-major middle-level interface to ztrrfs\n" ); } /* Initialize input data, call the column-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 2*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { rwork_i[i] = rwork[i]; } info_i = LAPACKE_ztrrfs( LAPACK_COL_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, a_i, lda_i, b_i, ldb_i, x_i, ldx_i, ferr_i, berr_i ); failed = compare_ztrrfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major high-level interface to ztrrfs\n" ); } else { printf( "FAILED: column-major high-level interface to ztrrfs\n" ); } /* Initialize input data, call the row-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 2*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { rwork_i[i] = rwork[i]; } LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, n, a_i, lda, a_r, n+2 ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, x_i, ldx, x_r, nrhs+2 ); info_i = LAPACKE_ztrrfs_work( LAPACK_ROW_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, a_r, lda_r, b_r, ldb_r, x_r, ldx_r, ferr_i, berr_i, work_i, rwork_i ); failed = compare_ztrrfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major middle-level interface to ztrrfs\n" ); } else { printf( "FAILED: row-major middle-level interface to ztrrfs\n" ); } /* Initialize input data, call the row-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b[i]; } for( i = 0; i < ldx*nrhs; i++ ) { x_i[i] = x[i]; } for( i = 0; i < nrhs; i++ ) { ferr_i[i] = ferr_save[i]; } for( i = 0; i < nrhs; i++ ) { berr_i[i] = berr_save[i]; } for( i = 0; i < 2*n; i++ ) { work_i[i] = work[i]; } for( i = 0; i < n; i++ ) { rwork_i[i] = rwork[i]; } /* Init row_major arrays */ LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, n, a_i, lda, a_r, n+2 ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, x_i, ldx, x_r, nrhs+2 ); info_i = LAPACKE_ztrrfs( LAPACK_ROW_MAJOR, uplo_i, trans_i, diag_i, n_i, nrhs_i, a_r, lda_r, b_r, ldb_r, x_r, ldx_r, ferr_i, berr_i ); failed = compare_ztrrfs( ferr, ferr_i, berr, berr_i, info, info_i, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major high-level interface to ztrrfs\n" ); } else { printf( "FAILED: row-major high-level interface to ztrrfs\n" ); } /* Release memory */ if( a != NULL ) { LAPACKE_free( a ); } if( a_i != NULL ) { LAPACKE_free( a_i ); } if( a_r != NULL ) { LAPACKE_free( a_r ); } if( b != NULL ) { LAPACKE_free( b ); } if( b_i != NULL ) { LAPACKE_free( b_i ); } if( b_r != NULL ) { LAPACKE_free( b_r ); } if( x != NULL ) { LAPACKE_free( x ); } if( x_i != NULL ) { LAPACKE_free( x_i ); } if( x_r != NULL ) { LAPACKE_free( x_r ); } if( ferr != NULL ) { LAPACKE_free( ferr ); } if( ferr_i != NULL ) { LAPACKE_free( ferr_i ); } if( ferr_save != NULL ) { LAPACKE_free( ferr_save ); } if( berr != NULL ) { LAPACKE_free( berr ); } if( berr_i != NULL ) { LAPACKE_free( berr_i ); } if( berr_save != NULL ) { LAPACKE_free( berr_save ); } if( work != NULL ) { LAPACKE_free( work ); } if( work_i != NULL ) { LAPACKE_free( work_i ); } if( rwork != NULL ) { LAPACKE_free( rwork ); } if( rwork_i != NULL ) { LAPACKE_free( rwork_i ); } return 0; }
void a1(void) { int N = 3, M = 1; data *dat = (data*) malloc(N*sizeof(data)); dat[0] = (data) { .val = 299793.0, .delta = 2.0 }; dat[1] = (data) { .val = 299792.0, .delta = 4.5 }; dat[2] = (data) { .val = 299782.0, .delta = 25.0 }; double avg = average_value(N, dat), sig_i = sigma_square_intern(N, dat), sig_e = sigma_square_extern(M, N, dat); printf("c_avg = %f\n", avg); printf("sigma_i = %f\n", sqrt(sig_i)); printf("sigma_e = %f\n", sqrt(sig_e)); } void a2(void) { int N; measurement *m = read_measurements("dat/a2.txt", &N); double a, b, sig_a, sig_b, chi; printf("\nLinear regression I = b*U:\n"); linear_regression(N, m, NULL, &b, NULL, &sig_b); chi = chi_square_(N, m, 0, b); printf("b = %f +/- %f, chi = %f\n", b, sqrt(sig_b)/2, sqrt(chi)); printf("\nLinear regression I = a + b*U:\n"); linear_regression(N, m, &a, &b, &sig_a, &sig_b); chi = chi_square_(N, m, a, b); printf("a = %f +/- %f, b = %f +/- %f, chi = %f\n", a, sqrt(sig_a)/2, b, sqrt(sig_b)/2, sqrt(chi)); } void a3(void) { // order of polynoms + 1 int N[5] = {3, 5, 9, 13, 17}; // output function (%i = current N) char *output_p_gp = "results/a3/poly-%i.gp", *output_p_txt = "results/a3/poly-%i.txt", *output_l_dat = "results/a3/legendre-%i.dat", *output_l_txt = "results/a3/legendre-%i.txt"; // read data matrix dat = read_data("dat/a3.txt"); int i; for (i = 0; i < sizeof(N)/sizeof(int); i++) { // // use polynoms // matrix F = init_F(dat, N[i], &polynom); vector b = init_b(dat, N[i], &polynom); // solve system of linear equations vector x = solve_gauss(F,b); char fp[100]; // parameter output snprintf(fp, sizeof(fp), output_p_txt, N[i]-1); printf("Opening file %s...\n", fp); FILE *file = fopen(fp, "w+"); vector_fprint(file, x); fclose(file); // gnuplot output snprintf(fp, sizeof(fp), output_p_gp, N[i]-1); printf("Opening file %s...\n", fp); file = fopen(fp, "w+"); fprintf(file, "plot "); int k; for (k = 0; k < x.N; k++) { fprintf(file, "%e * x**%i", VectorGET(x,k), k); if (k < x.N-1) fprintf(file, " + "); } fprintf(file, "\n"); fclose(file); // // Use legendre polynoms // F = init_F(dat, N[i], &legendre_polynom); b = init_b(dat, N[i], &legendre_polynom); // solve system of linear equations x = solve_gauss(F,b); // parameter output snprintf(fp, sizeof(fp), output_l_txt, N[i]-1); printf("Opening file %s...\n", fp); file = fopen(fp, "w+"); vector_fprint(file, x); // "plot" data for legendre polynoms snprintf(fp, sizeof(fp), output_l_dat, N[i]-1); printf("Opening file %s...\n", fp); file = fopen(fp, "w+"); double r = -1.0, dr = 0.01; while (r <= 1.0) { double sum = 0; for (k = 0; k < x.N; k++) { sum += VectorGET(x,k) * legendre_polynom(k,r); } fprintf(file, "%f %e\n", r, sum); r += dr; } fclose(file); file = NULL; } } int main() { //a1(); //a2(); a3(); return 0; }
int main(void) { /* Local scalars */ lapack_int itype, itype_i; char uplo, uplo_i; lapack_int n, n_i; lapack_int lda, lda_i; lapack_int lda_r; lapack_int ldb, ldb_i; lapack_int ldb_r; lapack_int info, info_i; lapack_int i; int failed; /* Local arrays */ lapack_complex_float *a = NULL, *a_i = NULL; lapack_complex_float *b = NULL, *b_i = NULL; lapack_complex_float *a_save = NULL; lapack_complex_float *a_r = NULL; lapack_complex_float *b_r = NULL; /* Iniitialize the scalar parameters */ init_scalars_chegst( &itype, &uplo, &n, &lda, &ldb ); lda_r = n+2; ldb_r = n+2; itype_i = itype; uplo_i = uplo; n_i = n; lda_i = lda; ldb_i = ldb; /* Allocate memory for the LAPACK routine arrays */ a = (lapack_complex_float *) LAPACKE_malloc( lda*n * sizeof(lapack_complex_float) ); b = (lapack_complex_float *) LAPACKE_malloc( ldb*n * sizeof(lapack_complex_float) ); /* Allocate memory for the C interface function arrays */ a_i = (lapack_complex_float *) LAPACKE_malloc( lda*n * sizeof(lapack_complex_float) ); b_i = (lapack_complex_float *) LAPACKE_malloc( ldb*n * sizeof(lapack_complex_float) ); /* Allocate memory for the backup arrays */ a_save = (lapack_complex_float *) LAPACKE_malloc( lda*n * sizeof(lapack_complex_float) ); /* Allocate memory for the row-major arrays */ a_r = (lapack_complex_float *) LAPACKE_malloc( n*(n+2) * sizeof(lapack_complex_float) ); b_r = (lapack_complex_float *) LAPACKE_malloc( n*(n+2) * sizeof(lapack_complex_float) ); /* Initialize input arrays */ init_a( lda*n, a ); init_b( ldb*n, b ); /* Backup the ouptut arrays */ for( i = 0; i < lda*n; i++ ) { a_save[i] = a[i]; } /* Call the LAPACK routine */ chegst_( &itype, &uplo, &n, a, &lda, b, &ldb, &info ); /* Initialize input data, call the column-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a_save[i]; } for( i = 0; i < ldb*n; i++ ) { b_i[i] = b[i]; } info_i = LAPACKE_chegst_work( LAPACK_COL_MAJOR, itype_i, uplo_i, n_i, a_i, lda_i, b_i, ldb_i ); failed = compare_chegst( a, a_i, info, info_i, lda, n ); if( failed == 0 ) { printf( "PASSED: column-major middle-level interface to chegst\n" ); } else { printf( "FAILED: column-major middle-level interface to chegst\n" ); } /* Initialize input data, call the column-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a_save[i]; } for( i = 0; i < ldb*n; i++ ) { b_i[i] = b[i]; } info_i = LAPACKE_chegst( LAPACK_COL_MAJOR, itype_i, uplo_i, n_i, a_i, lda_i, b_i, ldb_i ); failed = compare_chegst( a, a_i, info, info_i, lda, n ); if( failed == 0 ) { printf( "PASSED: column-major high-level interface to chegst\n" ); } else { printf( "FAILED: column-major high-level interface to chegst\n" ); } /* Initialize input data, call the row-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a_save[i]; } for( i = 0; i < ldb*n; i++ ) { b_i[i] = b[i]; } LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_i, lda, a_r, n+2 ); LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, b_i, ldb, b_r, n+2 ); info_i = LAPACKE_chegst_work( LAPACK_ROW_MAJOR, itype_i, uplo_i, n_i, a_r, lda_r, b_r, ldb_r ); LAPACKE_cge_trans( LAPACK_ROW_MAJOR, n, n, a_r, n+2, a_i, lda ); failed = compare_chegst( a, a_i, info, info_i, lda, n ); if( failed == 0 ) { printf( "PASSED: row-major middle-level interface to chegst\n" ); } else { printf( "FAILED: row-major middle-level interface to chegst\n" ); } /* Initialize input data, call the row-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < lda*n; i++ ) { a_i[i] = a_save[i]; } for( i = 0; i < ldb*n; i++ ) { b_i[i] = b[i]; } /* Init row_major arrays */ LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_i, lda, a_r, n+2 ); LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, b_i, ldb, b_r, n+2 ); info_i = LAPACKE_chegst( LAPACK_ROW_MAJOR, itype_i, uplo_i, n_i, a_r, lda_r, b_r, ldb_r ); LAPACKE_cge_trans( LAPACK_ROW_MAJOR, n, n, a_r, n+2, a_i, lda ); failed = compare_chegst( a, a_i, info, info_i, lda, n ); if( failed == 0 ) { printf( "PASSED: row-major high-level interface to chegst\n" ); } else { printf( "FAILED: row-major high-level interface to chegst\n" ); } /* Release memory */ if( a != NULL ) { LAPACKE_free( a ); } if( a_i != NULL ) { LAPACKE_free( a_i ); } if( a_r != NULL ) { LAPACKE_free( a_r ); } if( a_save != NULL ) { LAPACKE_free( a_save ); } if( b != NULL ) { LAPACKE_free( b ); } if( b_i != NULL ) { LAPACKE_free( b_i ); } if( b_r != NULL ) { LAPACKE_free( b_r ); } return 0; }
int main(void) { /* Local scalars */ char trans, trans_i; lapack_int n, n_i; lapack_int kl, kl_i; lapack_int ku, ku_i; lapack_int nrhs, nrhs_i; lapack_int ldab, ldab_i; lapack_int ldab_r; lapack_int ldb, ldb_i; lapack_int ldb_r; lapack_int info, info_i; lapack_int i; int failed; /* Local arrays */ lapack_complex_double *ab = NULL, *ab_i = NULL; lapack_int *ipiv = NULL, *ipiv_i = NULL; lapack_complex_double *b = NULL, *b_i = NULL; lapack_complex_double *b_save = NULL; lapack_complex_double *ab_r = NULL; lapack_complex_double *b_r = NULL; /* Iniitialize the scalar parameters */ init_scalars_zgbtrs( &trans, &n, &kl, &ku, &nrhs, &ldab, &ldb ); ldab_r = n+2; ldb_r = nrhs+2; trans_i = trans; n_i = n; kl_i = kl; ku_i = ku; nrhs_i = nrhs; ldab_i = ldab; ldb_i = ldb; /* Allocate memory for the LAPACK routine arrays */ ab = (lapack_complex_double *) LAPACKE_malloc( ldab*n * sizeof(lapack_complex_double) ); ipiv = (lapack_int *)LAPACKE_malloc( n * sizeof(lapack_int) ); b = (lapack_complex_double *) LAPACKE_malloc( ldb*nrhs * sizeof(lapack_complex_double) ); /* Allocate memory for the C interface function arrays */ ab_i = (lapack_complex_double *) LAPACKE_malloc( ldab*n * sizeof(lapack_complex_double) ); ipiv_i = (lapack_int *)LAPACKE_malloc( n * sizeof(lapack_int) ); b_i = (lapack_complex_double *) LAPACKE_malloc( ldb*nrhs * sizeof(lapack_complex_double) ); /* Allocate memory for the backup arrays */ b_save = (lapack_complex_double *) LAPACKE_malloc( ldb*nrhs * sizeof(lapack_complex_double) ); /* Allocate memory for the row-major arrays */ ab_r = (lapack_complex_double *) LAPACKE_malloc( ((2*kl+ku+1)*(n+2)) * sizeof(lapack_complex_double) ); b_r = (lapack_complex_double *) LAPACKE_malloc( n*(nrhs+2) * sizeof(lapack_complex_double) ); /* Initialize input arrays */ init_ab( ldab*n, ab ); init_ipiv( n, ipiv ); init_b( ldb*nrhs, b ); /* Backup the ouptut arrays */ for( i = 0; i < ldb*nrhs; i++ ) { b_save[i] = b[i]; } /* Call the LAPACK routine */ zgbtrs_( &trans, &n, &kl, &ku, &nrhs, ab, &ldab, ipiv, b, &ldb, &info ); /* Initialize input data, call the column-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < ldab*n; i++ ) { ab_i[i] = ab[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } info_i = LAPACKE_zgbtrs_work( LAPACK_COL_MAJOR, trans_i, n_i, kl_i, ku_i, nrhs_i, ab_i, ldab_i, ipiv_i, b_i, ldb_i ); failed = compare_zgbtrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major middle-level interface to zgbtrs\n" ); } else { printf( "FAILED: column-major middle-level interface to zgbtrs\n" ); } /* Initialize input data, call the column-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < ldab*n; i++ ) { ab_i[i] = ab[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } info_i = LAPACKE_zgbtrs( LAPACK_COL_MAJOR, trans_i, n_i, kl_i, ku_i, nrhs_i, ab_i, ldab_i, ipiv_i, b_i, ldb_i ); failed = compare_zgbtrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: column-major high-level interface to zgbtrs\n" ); } else { printf( "FAILED: column-major high-level interface to zgbtrs\n" ); } /* Initialize input data, call the row-major middle-level * interface to LAPACK routine and check the results */ for( i = 0; i < ldab*n; i++ ) { ab_i[i] = ab[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } LAPACKE_zge_trans( LAPACK_COL_MAJOR, 2*kl+ku+1, n, ab_i, ldab, ab_r, n+2 ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); info_i = LAPACKE_zgbtrs_work( LAPACK_ROW_MAJOR, trans_i, n_i, kl_i, ku_i, nrhs_i, ab_r, ldab_r, ipiv_i, b_r, ldb_r ); LAPACKE_zge_trans( LAPACK_ROW_MAJOR, n, nrhs, b_r, nrhs+2, b_i, ldb ); failed = compare_zgbtrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major middle-level interface to zgbtrs\n" ); } else { printf( "FAILED: row-major middle-level interface to zgbtrs\n" ); } /* Initialize input data, call the row-major high-level * interface to LAPACK routine and check the results */ for( i = 0; i < ldab*n; i++ ) { ab_i[i] = ab[i]; } for( i = 0; i < n; i++ ) { ipiv_i[i] = ipiv[i]; } for( i = 0; i < ldb*nrhs; i++ ) { b_i[i] = b_save[i]; } /* Init row_major arrays */ LAPACKE_zge_trans( LAPACK_COL_MAJOR, 2*kl+ku+1, n, ab_i, ldab, ab_r, n+2 ); LAPACKE_zge_trans( LAPACK_COL_MAJOR, n, nrhs, b_i, ldb, b_r, nrhs+2 ); info_i = LAPACKE_zgbtrs( LAPACK_ROW_MAJOR, trans_i, n_i, kl_i, ku_i, nrhs_i, ab_r, ldab_r, ipiv_i, b_r, ldb_r ); LAPACKE_zge_trans( LAPACK_ROW_MAJOR, n, nrhs, b_r, nrhs+2, b_i, ldb ); failed = compare_zgbtrs( b, b_i, info, info_i, ldb, nrhs ); if( failed == 0 ) { printf( "PASSED: row-major high-level interface to zgbtrs\n" ); } else { printf( "FAILED: row-major high-level interface to zgbtrs\n" ); } /* Release memory */ if( ab != NULL ) { LAPACKE_free( ab ); } if( ab_i != NULL ) { LAPACKE_free( ab_i ); } if( ab_r != NULL ) { LAPACKE_free( ab_r ); } if( ipiv != NULL ) { LAPACKE_free( ipiv ); } if( ipiv_i != NULL ) { LAPACKE_free( ipiv_i ); } if( b != NULL ) { LAPACKE_free( b ); } if( b_i != NULL ) { LAPACKE_free( b_i ); } if( b_r != NULL ) { LAPACKE_free( b_r ); } if( b_save != NULL ) { LAPACKE_free( b_save ); } return 0; }