/* //////////////////////////////////////////////////////////////////////////// -- Testing csymmetrize Code is very similar to testing_ctranspose.cpp */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time; float error, work[1]; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *h_A, *h_R; magmaFloatComplex *d_A; magma_int_t N, size, lda, ldda; magma_int_t ione = 1; magma_opts opts; parse_opts( argc, argv, &opts ); printf(" N CPU GByte/s (sec) GPU GByte/s (sec) check\n"); printf("=====================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; lda = N; ldda = ((N+31)/32)*32; size = lda*N; // load strictly lower triangle, save strictly upper triangle gbytes = sizeof(magmaFloatComplex) * 1.*N*(N-1) / 1e9; TESTING_MALLOC( h_A, magmaFloatComplex, size ); TESTING_MALLOC( h_R, magmaFloatComplex, size ); TESTING_DEVALLOC( d_A, magmaFloatComplex, ldda*N ); /* Initialize the matrix */ for( int j = 0; j < N; ++j ) { for( int i = 0; i < N; ++i ) { h_A[i + j*lda] = MAGMA_C_MAKE( i + j/10000., j ); } } /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_csetmatrix( N, N, h_A, lda, d_A, ldda ); gpu_time = magma_sync_wtime( 0 ); //magmablas_csymmetrize( opts.uplo, N-2, d_A+1+ldda, ldda ); // inset by 1 row & col magmablas_csymmetrize( opts.uplo, N, d_A, ldda ); gpu_time = magma_sync_wtime( 0 ) - gpu_time; gpu_perf = gbytes / gpu_time; /* ===================================================================== Performs operation using naive in-place algorithm (LAPACK doesn't implement symmetrize) =================================================================== */ cpu_time = magma_wtime(); //for( int j = 1; j < N-1; ++j ) { // inset by 1 row & col // for( int i = 1; i < j; ++i ) { for( int j = 0; j < N; ++j ) { for( int i = 0; i < j; ++i ) { if ( opts.uplo == MagmaLower ) { h_A[i + j*lda] = MAGMA_C_CNJG( h_A[j + i*lda] ); } else { h_A[j + i*lda] = MAGMA_C_CNJG( h_A[i + j*lda] ); } } } cpu_time = magma_wtime() - cpu_time; cpu_perf = gbytes / cpu_time; /* ===================================================================== Check the result =================================================================== */ magma_cgetmatrix( N, N, d_A, ldda, h_R, lda ); blasf77_caxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione); error = lapackf77_clange("f", &N, &N, h_R, &lda, work); printf("%5d %7.2f (%7.2f) %7.2f (%7.2f) %s\n", (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time, (error == 0. ? "ok" : "failed") ); TESTING_FREE( h_A ); TESTING_FREE( h_R ); TESTING_DEVFREE( d_A ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return 0; }
int main(int argc, char *argv[]) { size_t memsize = 0; /* at first in MB, limited to 4Gb on 32 bits */ int pagesize; int i; int lc; char *msg; char *p, *bigmalloc; int loop_count; /* limited to 16Go on 32 bits systems */ pagesize = sysconf(_SC_PAGESIZE); if ((msg = parse_opts(argc, argv, options, help)) != (char *)NULL) tst_brkm(TBROK, cleanup, "OPTION PARSING ERROR - %s", msg); if (m_opt) { memsize = (size_t) atoi(m_copt) * 1024 * 1024; if (memsize < 1) { tst_brkm(TBROK, cleanup, "Invalid arg for -m: %s", m_copt); } } if (r_opt) srand(time(NULL)); setup(); for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; if (!m_opt) { /* find out by ourselves! */ memsize = get_memsize(); if (memsize < 1) { tst_brkm(TBROK, cleanup, "Unable to guess maxmemsize from /proc/meminfo"); } } /* Allocate (virtual) memory */ bigmalloc = p = malloc(memsize); if (!p) { tst_resm(TFAIL, "malloc - alloc of %zuMB failed", memsize / 1024 / 1024); cleanup(); } /* * Dirty all the pages, to force physical RAM allocation * and exercise eventually the swapper */ tst_resm(TINFO, "touching %zuMB of malloc'ed memory (%s)", memsize / 1024 / 1024, r_opt ? "random" : "linear"); loop_count = memsize / pagesize; for (i = 0; i < loop_count; i++) { if (v_opt && (i % (PROGRESS_LEAP * 1024 / pagesize) == 0)) { printf("."); fflush(stdout); } /* * Make the page dirty, * and make sure compiler won't optimize it away * Touching more than one word per page is useless * because of cache. */ *(int *)p = 0xdeadbeef ^ i; if (r_opt) { p = bigmalloc + (size_t) ((double)(memsize - sizeof(int)) * rand() / (RAND_MAX + 1.0)); } else { p += pagesize; } } if (v_opt) printf("\n"); /* This is not mandatory (except in a loop), but it exercise mm again */ free(bigmalloc); /* * seems that if the malloc'ed area was bad, we'd get SEGV (or kicked * somehow by the OOM killer?), hence we can indicate a PASS. */ tst_resm(TPASS, "malloc - alloc of %zuMB succeeded", memsize / 1024 / 1024); } cleanup(); return 0; }
int main(int ac, char **av) { int lc; char *msg; void check_functionality(void); int r_val; if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); setup(); /* global setup */ for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; /* get the name of the test dirctory */ if ((temp_dir = (getcwd(temp_dir, 0))) == NULL) tst_brkm(TBROK, cleanup, "getcwd failed"); /* * create a new directory and open it */ if ((r_val = mkdir(TEST_DIR, MODES)) == -1) tst_brkm(TBROK, cleanup, "mkdir failed"); if ((fd = open(TEST_DIR, O_RDONLY)) == -1) tst_brkm(TBROK, cleanup, "open of directory failed"); TEST(fchdir(fd)); if (TEST_RETURN == -1) tst_brkm(TFAIL | TTERRNO, cleanup, "fchdir call failed"); else { if (STD_FUNCTIONAL_TEST) check_functionality(); else tst_resm(TPASS, "call succeeded"); } /* * clean up things in case we are looping */ /* * NOTE: in case of failure here, we need to use "tst_resm()" * and not "tst_brkm()". This is because if we get to this * point, we have already set a PASS or FAIL for the test * and "tst_brkm()" won't report as we might expect. */ /* chdir back to our temporary work directory */ if ((r_val = chdir("..")) == -1) tst_resm(TBROK | TERRNO, "chdir failed"); if ((r_val = rmdir(TEST_DIR)) == -1) tst_resm(TBROK | TERRNO, "rmdir failed"); free(temp_dir); temp_dir = NULL; } cleanup(); tst_exit(); }
int main(int ac, char **av) { int lc; char *msg; int ind; int offset; /*************************************************************** * parse standard options ***************************************************************/ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) { tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); } /*************************************************************** * perform global setup for test ***************************************************************/ setup(); /* set the expected errnos... */ TEST_EXP_ENOS(exp_enos); /*************************************************************** * check looping state if -c option given ***************************************************************/ for (lc = 0; TEST_LOOPING(lc); lc++) { tst_count = 0; offset = (lc % 100) * 4096; /* max size is 100 blocks */ for (ind = 0; Whence[ind] >= 0; ind++) { /* * Call lseek(2) */ TEST(lseek(Fd, (long)offset, Whence[ind])); /* check return code */ if (TEST_RETURN == -1) { TEST_ERROR_LOG(TEST_ERRNO); tst_resm(TFAIL, "lseek(%s, %d, 0) Failed, errno=%d : %s", Fname, offset, TEST_ERRNO, strerror(TEST_ERRNO)); } else { /*************************************************************** * only perform functional verification if flag set (-f not given) ***************************************************************/ if (STD_FUNCTIONAL_TEST) { /* No Verification test, yet... */ tst_resm(TPASS, "lseek(%s, %d, %d) returned %ld", Fname, offset, Whence[ind], TEST_RETURN); } else tst_count++; } } } /*************************************************************** * cleanup and exit ***************************************************************/ cleanup(); tst_exit(); }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaDoubleComplex *A, *B; double diff, error; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t m, n, k, size, maxn, ld; magmaDoubleComplex x2_m, x2_c; // complex x for magma, cblas/fortran blas respectively double x_m, x_c; // x for magma, cblas/fortran blas respectively magma_opts opts; parse_opts( argc, argv, &opts ); opts.tolerance = max( 100., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); gTol = tol; printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "\n" "Diff compares MAGMA wrapper to CBLAS and BLAS function; should be exactly 0.\n" "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n" "\n" ); double total_diff = 0.; double total_error = 0.; int inc[] = { 1 }; //{ -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; int ninc = sizeof(inc)/sizeof(*inc); for( int itest = 0; itest < opts.ntest; ++itest ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; printf("=========================================================================\n"); printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n", (int) m, (int) n, (int) k, (int) incx, (int) incy ); printf( "Function MAGMA CBLAS BLAS Diff Error\n" " msec msec msec\n" ); // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) ); ld = max( 1, maxn ); size = ld*maxn; magma_zmalloc_pinned( &A, size ); assert( A != NULL ); magma_zmalloc_pinned( &B, size ); assert( B != NULL ); // initialize matrices lapackf77_zlarnv( &ione, ISEED, &size, A ); lapackf77_zlarnv( &ione, ISEED, &size, B ); printf( "Level 1 BLAS ----------------------------------------------------------\n" ); // ----- test DZASUM // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dzasum( m, A(0,j), incx ); x_c = cblas_dzasum( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dzasum( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dzasum", diff, error ); total_diff += diff; total_error += error; } // ----- test DZNRM2 // get two-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dznrm2( m, A(0,j), incx ); x_c = cblas_dznrm2( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dznrm2( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dznrm2", diff, error ); total_diff += diff; total_error += error; } // ----- test ZDOTC // dot columns, Aj^H Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "zdotc", diff, error ); total_diff += diff; total_error += error; total_error += error; // ----- test ZDOTU // dot columns, Aj^T * Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "zdotu", diff, error ); total_diff += diff; total_error += error; // tell user about disabled functions #if defined( MAGMA_WITH_MKL ) printf( "cblas_zdotc and cblas_zdotu disabled with MKL (segfaults)\n" ); #endif #if defined( __APPLE__ ) printf( "blasf77_zdotc and blasf77_zdotu disabled on MacOS (segfaults)\n" ); #endif // cleanup magma_free_pinned( A ); magma_free_pinned( B ); fflush( stdout ); }}} // itest, incx, incy // TODO use average error? printf( "sum diffs = %8.2g, MAGMA wrapper compared to CBLAS and Fortran BLAS; should be exactly 0.\n" "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n", total_diff, total_error ); if ( total_diff != 0. ) { printf( "some tests failed diff == 0.; see above.\n" ); } else { printf( "all tests passed diff == 0.\n" ); } TESTING_FINALIZE(); int status = (total_diff != 0.); return status; }
static void report_to_kerneloops( const char *dump_dir_name, map_string_t *settings) { problem_data_t *problem_data = create_problem_data_for_reporting(dump_dir_name); if (!problem_data) xfunc_die(); /* create_problem_data_for_reporting already emitted error msg */ const char *backtrace = problem_data_get_content_or_NULL(problem_data, FILENAME_BACKTRACE); if (!backtrace) error_msg_and_die("Error sending kernel oops due to missing backtrace"); const char *env = getenv("KerneloopsReporter_SubmitURL"); const char *submitURL = (env ? env : get_map_string_item_or_empty(settings, "SubmitURL")); if (!submitURL[0]) submitURL = "http://oops.kernel.org/submitoops.php"; log(_("Submitting oops report to %s"), submitURL); CURLcode ret = http_post_to_kerneloops_site(submitURL, backtrace); if (ret != CURLE_OK) error_msg_and_die("Kernel oops has not been sent due to %s", curl_easy_strerror(ret)); problem_data_free(problem_data); /* Server replies with: * 200 thank you for submitting the kernel oops information * RemoteIP: 34192fd15e34bf60fac6a5f01bba04ddbd3f0558 * - no URL or bug ID apparently... */ struct dump_dir *dd = dd_opendir(dump_dir_name, /*flags:*/ 0); if (dd) { report_result_t rr = { .label = (char *)"kerneloops" }; rr.url = (char *)submitURL; add_reported_to_entry(dd, &rr); dd_close(dd); } log("Kernel oops report was uploaded"); } int main(int argc, char **argv) { abrt_init(argv); /* I18n */ setlocale(LC_ALL, ""); #if ENABLE_NLS bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); #endif map_string_t *settings = new_map_string(); const char *dump_dir_name = "."; GList *conf_file = NULL; /* Can't keep these strings/structs static: _() doesn't support that */ const char *program_usage_string = _( "& [-v] [-c CONFFILE]... -d DIR\n" "\n" "Reports kernel oops to kerneloops.org (or similar) site.\n" "\n" "Files with names listed in $EXCLUDE_FROM_REPORT are not included\n" "into the tarball.\n" "\n" "CONFFILE lines should have 'PARAM = VALUE' format.\n" "Recognized string parameter: SubmitURL.\n" "Parameter can be overridden via $KerneloopsReporter_SubmitURL." ); enum { OPT_v = 1 << 0, OPT_d = 1 << 1, OPT_c = 1 << 2, }; /* Keep enum above and order of options below in sync! */ struct options program_options[] = { OPT__VERBOSE(&g_verbose), OPT_STRING('d', NULL, &dump_dir_name, "DIR" , _("Problem directory")), OPT_LIST( 'c', NULL, &conf_file , "FILE", _("Configuration file")), OPT_END() }; /*unsigned opts =*/ parse_opts(argc, argv, program_options, program_usage_string); export_abrt_envvars(0); while (conf_file) { char *fn = (char *)conf_file->data; log_notice("Loading settings from '%s'", fn); load_conf_file(fn, settings, /*skip key w/o values:*/ false); log_debug("Loaded '%s'", fn); conf_file = g_list_remove(conf_file, fn); } report_to_kerneloops(dump_dir_name, settings); free_map_string(settings); return 0; }
/*ARGSUSED*/ int main(int argc, char **argv) { register int i, j, ok, pid; int count, child, status, nwait; #ifdef UCLINUX char *msg; if ((msg = parse_opts(argc, argv, (option_t *)NULL, NULL)) != (char *)NULL){ tst_brkm(TBROK, cleanup, "OPTION PARSING ERROR - %s", msg); } argv0 = argv[0]; maybe_run_child(&do_child, "dS", &id_uclinux, &maxsemstring); #endif prog = argv[0]; nwait = 0; setup(); /*--------------------------------------------------------------*/ srand(getpid()); tid = -1; for (i = 0; i < NPROCS; i++) { do { keyarray[i] = (key_t)rand(); if (keyarray[i] == IPC_PRIVATE) { ok = 0; continue; } ok = 1; for (j = 0; j < i; j++) { if (keyarray[j] == keyarray[i]) { ok = 0; break; } } } while (ok == 0); } if ((signal(SIGTERM, term)) == SIG_ERR) { tst_resm(TFAIL, "\tsignal failed. errno = %d", errno); tst_exit(); } for (i = 0; i < NPROCS; i++) { if ((pid = FORK_OR_VFORK()) < 0) { tst_resm(TFAIL, "\tFork failed (may be OK if under stress)"); tst_exit(); } if (pid == 0) { procstat = 1; dotest(keyarray[i]); exit(0); } pidarray[i] = pid; nwait++; } /* * Wait for children to finish. */ count = 0; while((child = wait(&status)) > 0) { if (status) { tst_resm(TFAIL, "%s[%d] Test failed. exit=0x%x", prog, child, status); local_flag = FAILED; } ++count; } /* * Should have collected all children. */ if (count != nwait) { tst_resm(TFAIL, "\tWrong # children waited on, count = %d", count); local_flag = FAILED; } if (local_flag != FAILED) tst_resm(TPASS, "semctl06 ran successfully!"); else tst_resm(TFAIL, "semctl06 failed"); /*--------------------------------------------------------------*/ /* Clean up any files created by test before call to anyfail. */ cleanup (); return (0); /* shut lint up */ }
int main(int ac, char **av) { int lc; /* loop counter */ char *msg; /* message returned from parse_opts */ int incr; /* increment */ long nbrkpt; /* new brk point value */ long cur_brk_val; /* current size returned by sbrk */ long aft_brk_val; /* current size returned by sbrk */ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); setup(); /* * Attempt to control how fast we get to test max size. * Every MAX_SIZE_LC'th lc will be fastest test will reach max size. */ incr = (Max_brk_byte_size - Beg_brk_val) / (MAX_SIZE_LC / 2); if ((incr * 2) < 4096) /* make sure that process will grow */ incr += 4096 / 2; for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; /* * Determine new value to give brk * Every even lc value, grow by 2 incr and * every odd lc value, strink by one incr. * If lc is equal to 3, no change, special case. */ cur_brk_val = (long)sbrk(0); if (lc == 3) { nbrkpt = cur_brk_val; /* no change, special one time case */ } else if ((lc % 2) == 0) { /* * grow */ nbrkpt = cur_brk_val + (2 * incr); if (nbrkpt > Max_brk_byte_size) nbrkpt = Beg_brk_val; /* start over */ } else { /* * shrink */ nbrkpt = cur_brk_val - incr; } /**** printf("cur_brk_val = %d, nbrkpt = %d, incr = %d, lc = %d\n", cur_brk_val, nbrkpt, incr, lc); ****/ /* * Call brk(2) */ TEST(brk((char *)nbrkpt)); /* check return code */ if (TEST_RETURN == -1) { aft_brk_val = (long)sbrk(0); tst_resm(TFAIL|TTERRNO, "brk(%ld) failed (size before %ld, after %ld)", nbrkpt, cur_brk_val, aft_brk_val); } else { if (STD_FUNCTIONAL_TEST) { aft_brk_val = (long)sbrk(0); if (aft_brk_val == nbrkpt) { tst_resm(TPASS, "brk(%ld) returned %ld, new size verified by sbrk", nbrkpt, TEST_RETURN); } else { tst_resm(TFAIL, "brk(%ld) returned %ld, sbrk before %ld, after %ld", nbrkpt, TEST_RETURN, cur_brk_val, aft_brk_val); } } } } cleanup(); tst_exit(); }
int main(int ac, char **av) { int lc; char *msg; int results; /* Disable test if the version of the kernel is less than 2.6.17 */ if (((results = tst_kvercmp(2, 6, 17)) < 0)) { tst_resm(TWARN, "This test can only run on kernels that are "); tst_resm(TWARN, "2.6.17 and higher"); exit(0); } /* * parse standard options */ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); /* * perform global setup for test */ setup(); /* * check if the current filesystem is nfs */ if (tst_is_cwd_nfs()) { tst_brkm(TCONF, cleanup, "Cannot do tee on a file located on an NFS filesystem"); } /* * check looping state if -c option given */ for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; /* * Call tee_test */ TEST(tee_test()); /* check return code */ if (TEST_RETURN < 0) { if (TEST_RETURN != -1) { TEST_ERRNO = -TEST_RETURN; } TEST_ERROR_LOG(TEST_ERRNO); tst_resm(TFAIL, "tee() Failed, errno=%d : %s", TEST_ERRNO, strerror(TEST_ERRNO)); } else { /* * only perform functional verification if flag set (-f not given) */ if (STD_FUNCTIONAL_TEST) { /* No Verification test, yet... */ tst_resm(TPASS, "tee() returned %ld", TEST_RETURN); } } } /* * cleanup and exit */ cleanup(); return (0); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zgesv */ int main(int argc, char **argv) { TESTING_INIT(); real_Double_t gflops, cpu_perf, cpu_time, gpu_perf, gpu_time; double error, Rnorm, Anorm, Xnorm, *work; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex *h_A, *h_LU, *h_B, *h_X; magma_int_t *ipiv; magma_int_t N, nrhs, lda, ldb, info, sizeA, sizeB; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); nrhs = opts.nrhs; printf("ngpu %d\n", (int) opts.ngpu ); printf(" N NRHS CPU Gflop/s (sec) GPU GFlop/s (sec) ||B - AX|| / N*||A||*||X||\n"); printf("================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldb = lda; gflops = ( FLOPS_ZGETRF( N, N ) + FLOPS_ZGETRS( N, nrhs ) ) / 1e9; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*N ); TESTING_MALLOC_CPU( h_LU, magmaDoubleComplex, lda*N ); TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, ldb*nrhs ); TESTING_MALLOC_CPU( h_X, magmaDoubleComplex, ldb*nrhs ); TESTING_MALLOC_CPU( work, double, N ); TESTING_MALLOC_CPU( ipiv, magma_int_t, N ); /* Initialize the matrices */ sizeA = lda*N; sizeB = ldb*nrhs; lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B ); // copy A to LU and B to X; save A and B for residual lapackf77_zlacpy( "F", &N, &N, h_A, &lda, h_LU, &lda ); lapackf77_zlacpy( "F", &N, &nrhs, h_B, &ldb, h_X, &ldb ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ gpu_time = magma_wtime(); magma_zgesv( N, nrhs, h_LU, lda, ipiv, h_X, ldb, opts.queues2, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_zgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); //===================================================================== // Residual //===================================================================== Anorm = lapackf77_zlange("I", &N, &N, h_A, &lda, work); Xnorm = lapackf77_zlange("I", &N, &nrhs, h_X, &ldb, work); blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N, &c_one, h_A, &lda, h_X, &ldb, &c_neg_one, h_B, &ldb); Rnorm = lapackf77_zlange("I", &N, &nrhs, h_B, &ldb, work); error = Rnorm/(N*Anorm*Xnorm); status += ! (error < tol); /* ==================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_zgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_zgesv returned error %d: %s.\n", (int) info, magma_strerror( info )); printf( "%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } else { printf( "%5d %5d --- ( --- ) %7.2f (%7.2f) %8.2e %s\n", (int) N, (int) nrhs, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed")); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_LU ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_X ); TESTING_FREE_CPU( work ); TESTING_FREE_CPU( ipiv ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing cunmqr */ int main( int argc, char** argv ) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; float error, work[1]; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magma_int_t ione = 1; magma_int_t mm, m, n, k, size, info; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t nb, ldc, lda, lwork, lwork_max; magmaFloatComplex *C, *R, *A, *W, *tau; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); // need slightly looser bound (60*eps instead of 30*eps) for some tests opts.tolerance = max( 60., opts.tolerance ); float tol = opts.tolerance * lapackf77_slamch("E"); // test all combinations of input parameters magma_side_t side [] = { MagmaLeft, MagmaRight }; magma_trans_t trans[] = { MagmaConjTrans, MagmaNoTrans }; printf(" M N K side trans CPU GFlop/s (sec) GPU GFlop/s (sec) ||R||_F / ||QC||_F\n"); printf("===============================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iside = 0; iside < 2; ++iside ) { for( int itran = 0; itran < 2; ++itran ) { for( int iter = 0; iter < opts.niter; ++iter ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; nb = magma_get_cgeqrf_nb( m ); ldc = m; // A is m x k (left) or n x k (right) mm = (side[iside] == MagmaLeft ? m : n); lda = mm; gflops = FLOPS_CUNMQR( m, n, k, side[iside] ) / 1e9; if ( side[iside] == MagmaLeft && m < k ) { printf( "%5d %5d %5d %4c %5c skipping because side=left and m < k\n", (int) m, (int) n, (int) k, lapacke_side_const( side[iside] ), lapacke_trans_const( trans[itran] ) ); continue; } if ( side[iside] == MagmaRight && n < k ) { printf( "%5d %5d %5d %4c %5c skipping because side=right and n < k\n", (int) m, (int) n, (int) k, lapacke_side_const( side[iside] ), lapacke_trans_const( trans[itran] ) ); continue; } // need at least 2*nb*nb for geqrf lwork_max = max( max( m*nb, n*nb ), 2*nb*nb ); TESTING_MALLOC_CPU( C, magmaFloatComplex, ldc*n ); TESTING_MALLOC_CPU( R, magmaFloatComplex, ldc*n ); TESTING_MALLOC_CPU( A, magmaFloatComplex, lda*k ); TESTING_MALLOC_CPU( W, magmaFloatComplex, lwork_max ); TESTING_MALLOC_CPU( tau, magmaFloatComplex, k ); // C is full, m x n size = ldc*n; lapackf77_clarnv( &ione, ISEED, &size, C ); lapackf77_clacpy( "Full", &m, &n, C, &ldc, R, &ldc ); size = lda*k; lapackf77_clarnv( &ione, ISEED, &size, A ); // compute QR factorization to get Householder vectors in A, tau magma_cgeqrf( mm, k, A, lda, tau, W, lwork_max, &info ); if (info != 0) printf("magma_cgeqrf returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Performs operation using LAPACK =================================================================== */ cpu_time = magma_wtime(); lapackf77_cunmqr( lapack_side_const( side[iside] ), lapack_trans_const( trans[itran] ), &m, &n, &k, A, &lda, tau, C, &ldc, W, &lwork_max, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_cunmqr returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ==================================================================== Performs operation using MAGMA =================================================================== */ // query for workspace size lwork = -1; magma_cunmqr( side[iside], trans[itran], m, n, k, A, lda, tau, R, ldc, W, lwork, &info ); if (info != 0) printf("magma_cunmqr (lwork query) returned error %d: %s.\n", (int) info, magma_strerror( info )); lwork = (magma_int_t) MAGMA_C_REAL( W[0] ); if ( lwork < 0 || lwork > lwork_max ) { printf("optimal lwork %d > lwork_max %d\n", (int) lwork, (int) lwork_max ); lwork = lwork_max; } gpu_time = magma_wtime(); magma_cunmqr( side[iside], trans[itran], m, n, k, A, lda, tau, R, ldc, W, lwork, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cunmqr returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== compute relative error |QC_magma - QC_lapack| / |QC_lapack| =================================================================== */ error = lapackf77_clange( "Fro", &m, &n, C, &ldc, work ); size = ldc*n; blasf77_caxpy( &size, &c_neg_one, C, &ione, R, &ione ); error = lapackf77_clange( "Fro", &m, &n, R, &ldc, work ) / error; printf( "%5d %5d %5d %4c %5c %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) m, (int) n, (int) k, lapacke_side_const( side[iside] ), lapacke_trans_const( trans[itran] ), cpu_perf, cpu_time, gpu_perf, gpu_time, error, (error < tol ? "ok" : "failed") ); status += ! (error < tol); TESTING_FREE_CPU( C ); TESTING_FREE_CPU( R ); TESTING_FREE_CPU( A ); TESTING_FREE_CPU( W ); TESTING_FREE_CPU( tau ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } }} // end iside, itran printf( "\n" ); } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zhetrd_he2hb */ int main( int argc, char** argv) { TESTING_INIT_MGPU(); real_Double_t gflops, gpu_time, gpu_perf; magmaDoubleComplex *h_A, *h_R, *h_work; magmaDoubleComplex *tau; double *D, *E; magma_int_t N, n2, lda, ldda, lwork, ldt, info, nstream; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; // TODO add these options to parse_opts magma_int_t NE = 0; magma_int_t distblk = 0; magma_opts opts; parse_opts( argc, argv, &opts ); magma_int_t WANTZ = (opts.jobz == MagmaVec); double tol = opts.tolerance * lapackf77_dlamch("E"); if (opts.nb == 0) opts.nb = 64; //magma_get_zhetrd_he2hb_nb(N); if (NE < 1) NE = N; //64; //magma_get_zhetrd_he2hb_nb(N); nstream = max(3, opts.ngpu+2); magma_queue_t streams[MagmaMaxGPUs][20]; magmaDoubleComplex *da[MagmaMaxGPUs], *dT1[MagmaMaxGPUs]; if ((distblk == 0) || (distblk < opts.nb)) distblk = max(256, opts.nb); printf("voici ngpu %d distblk %d NB %d nstream %d\n ", (int) opts.ngpu, (int) distblk, (int) opts.nb, (int) nstream); for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) { magma_setdevice( dev ); for( int i = 0; i < nstream; ++i ) { magma_queue_create( &streams[dev][i] ); } } magma_setdevice( 0 ); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldt = N; ldda = ((N+31)/32)*32; n2 = N*lda; /* We suppose the magma NB is bigger than lapack NB */ lwork = N*opts.nb; //gflops = ....? /* Allocate host memory for the matrix */ TESTING_MALLOC_CPU( tau, magmaDoubleComplex, N-1 ); TESTING_MALLOC_PIN( h_A, magmaDoubleComplex, lda*N ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, lda*N ); TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork ); TESTING_MALLOC_PIN( D, double, N ); TESTING_MALLOC_PIN( E, double, N ); for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) { magma_int_t mlocal = ((N / distblk) / opts.ngpu + 1) * distblk; magma_setdevice( dev ); TESTING_MALLOC_DEV( da[dev], magmaDoubleComplex, ldda*mlocal ); TESTING_MALLOC_DEV( dT1[dev], magmaDoubleComplex, N*opts.nb ); } /* ==================================================================== Initialize the matrix =================================================================== */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); magma_zmake_hermitian( N, h_A, lda ); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ /* Copy the matrix to the GPU */ magma_zsetmatrix_1D_col_bcyclic( N, N, h_R, lda, da, ldda, opts.ngpu, distblk); //magmaDoubleComplex *dabis; //TESTING_MALLOC_DEV( dabis, magmaDoubleComplex, ldda*N ); //magma_zsetmatrix(N, N, h_R, lda, dabis, ldda); for (int count=0; count < 1; ++count) { magma_setdevice(0); gpu_time = magma_wtime(); if (opts.version == 30) { magma_zhetrd_he2hb_mgpu_spec( opts.uplo, N, opts.nb, h_R, lda, tau, h_work, lwork, da, ldda, dT1, opts.nb, opts.ngpu, distblk, streams, nstream, opts.nthread, &info); } else { nstream = 3; magma_zhetrd_he2hb_mgpu( opts.uplo, N, opts.nb, h_R, lda, tau, h_work, lwork, da, ldda, dT1, opts.nb, opts.ngpu, distblk, streams, nstream, opts.nthread, &info); } // magma_zhetrd_he2hb(opts.uplo, N, opts.nb, h_R, lda, tau, h_work, lwork, dT1[0], &info); gpu_time = magma_wtime() - gpu_time; printf(" Finish BAND N %d NB %d dist %d ngpu %d version %d timing= %f\n", N, opts.nb, distblk, opts.ngpu, opts.version, gpu_time); } magma_setdevice(0); for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) { magma_setdevice(dev); cudaDeviceSynchronize(); } magma_setdevice(0); magmablasSetKernelStream( NULL ); // todo neither of these is declared in headers // magma_zhetrd_bhe2trc_v5(opts.nthread, WANTZ, opts.uplo, NE, N, opts.nb, h_R, lda, D, E, dT1[0], ldt); // magma_zhetrd_bhe2trc(opts.nthread, WANTZ, opts.uplo, NE, N, opts.nb, h_R, lda, D, E, dT1[0], ldt); // todo where is this timer started? // gpu_time = magma_wtime() - gpu_time; // todo what are the gflops? gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_zhetrd_he2hb returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Print performance and error. =================================================================== */ #if defined(CHECKEIG) #if defined(PRECISION_z) || defined(PRECISION_d) if ( opts.check ) { printf(" Total N %5d flops %6.2f timing %6.2f seconds\n", (int) N, gpu_perf, gpu_time ); char JOBZ; if (WANTZ == 0) JOBZ = 'N'; else JOBZ = 'V'; double nrmI=0.0, nrm1=0.0, nrm2=0.0; int lwork2 = 256*N; magmaDoubleComplex *work2, *AINIT; double *rwork2, *D2; // TODO free this memory ! magma_zmalloc_cpu( &work2, lwork2 ); magma_dmalloc_cpu( &rwork2, N ); magma_dmalloc_cpu( &D2, N ); magma_zmalloc_cpu( &AINIT, N*lda ); memcpy(AINIT, h_A, N*lda*sizeof(magmaDoubleComplex)); /* ===================================================================== Performs operation using LAPACK =================================================================== */ cpu_time = magma_wtime(); int nt = min(12, opts.nthread); #if defined(USEMKL) mkl_set_num_threads(nt); #endif #if defined(USEACML) omp_set_num_threads(nt); #endif #if defined(PRECISION_z) || defined (PRECISION_c) lapackf77_zheev( "N", "L", &N, h_A, &lda, D2, work2, &lwork2, rwork2, &info ); #else lapackf77_dsyev( "N", "L", &N, h_A, &lda, D2, work2, &lwork2, &info ); #endif ///* call eigensolver for our resulting tridiag [D E] and for Q */ //dstedc_withZ('V', N, D, E, h_R, lda); ////dsterf_( &N, D, E, &info); cpu_time = magma_wtime() - cpu_time; printf(" Finish CHECK - EIGEN timing= %f threads %d\n", cpu_time, nt); /* compare result */ cmp_vals(N, D2, D, &nrmI, &nrm1, &nrm2); magmaDoubleComplex *WORKAJETER; double *RWORKAJETER, *RESU; // TODO free this memory ! magma_zmalloc_cpu( &WORKAJETER, (2* N * N + N) ); magma_dmalloc_cpu( &RWORKAJETER, N ); magma_dmalloc_cpu( &RESU, 10 ); int MATYPE; memset(RESU, 0, 10*sizeof(double)); MATYPE=3; double NOTHING=0.0; cpu_time = magma_wtime(); // check results zcheck_eig_( lapack_vec_const(opts.jobz), &MATYPE, &N, &opts.nb, AINIT, &lda, &NOTHING, &NOTHING, D2, D, h_R, &lda, WORKAJETER, RWORKAJETER, RESU ); cpu_time = magma_wtime() - cpu_time; printf(" Finish CHECK - results timing= %f\n", cpu_time); #if defined(USEMKL) mkl_set_num_threads(1); #endif #if defined(USEACML) omp_set_num_threads(1); #endif printf("\n"); printf(" ================================================================================================================\n"); printf(" ==> INFO voici threads=%d N=%d NB=%d WANTZ=%d\n", (int) opts.nthread, (int) N, (int) opts.nb, (int) WANTZ); printf(" ================================================================================================================\n"); printf(" DSBTRD : %15s \n", "STATblgv9withQ "); printf(" ================================================================================================================\n"); if (WANTZ > 0) printf(" | A - U S U' | / ( |A| n ulp ) : %15.3E \n", RESU[0]); if (WANTZ > 0) printf(" | I - U U' | / ( n ulp ) : %15.3E \n", RESU[1]); printf(" | D1 - EVEIGS | / (|D| ulp) : %15.3E \n", RESU[2]); printf(" max | D1 - EVEIGS | : %15.3E \n", RESU[6]); printf(" ================================================================================================================\n\n\n"); printf(" ****************************************************************************************************************\n"); printf(" * Hello here are the norm Infinite (max)=%8.2e norm one (sum)=%8.2e norm2(sqrt)=%8.2e *\n", nrmI, nrm1, nrm2); printf(" ****************************************************************************************************************\n\n"); } #endif // PRECISION_z || PRECISION_d #endif // CHECKEIG printf(" Total N %5d flops %6.2f timing %6.2f seconds\n", (int) N, 0.0, gpu_time ); printf("============================================================================\n\n\n"); TESTING_FREE_CPU( tau ); TESTING_FREE_PIN( h_A ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_work ); TESTING_FREE_PIN( D ); TESTING_FREE_PIN( E ); for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) { magma_setdevice( dev ); TESTING_FREE_DEV( da[dev] ); TESTING_FREE_DEV( dT1[dev] ); } magma_setdevice( 0 ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) { for( int i = 0; i < nstream; ++i ) { magma_queue_destroy( streams[dev][i] ); } } TESTING_FINALIZE_MGPU(); return status; }
int main(int ac, char **av) { int lc; /* loop counter */ char *msg; /* message returned from parse_opts */ int status; /*************************************************************** * parse standard options ***************************************************************/ if ((msg = parse_opts(ac, av, (option_t *) NULL, NULL)) != (char *)NULL) { tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); tst_exit(); } #ifdef UCLINUX maybe_run_child(&do_child, ""); #endif /*************************************************************** * perform global setup for test ***************************************************************/ setup(); /*************************************************************** * check looping state if -c option given ***************************************************************/ for (lc = 0; TEST_LOOPING(lc); lc++) { /* reset Tst_count in case we are looping. */ Tst_count = 0; /* make a child process so we can kill it */ /* If we cannot fork => we cannot test kill, so break and exit */ if ((fork_pid = FORK_OR_VFORK()) == -1) { tst_brkm(TBROK, cleanup, "fork() Failure. errno=%d : %s", errno, strerror(errno)); } if (fork_pid == 0) { /* CHILD */ #ifdef UCLINUX if (self_exec(av[0], "") < 0) { tst_brkm(TBROK, cleanup, "self_exec of child failed"); } #else do_child(); #endif } /* PARENT */ /* * Call kill(2) */ TEST(kill(fork_pid, SIGKILL)); /* check return code */ if (TEST_RETURN == -1) { TEST_ERROR_LOG(TEST_ERRNO); tst_resm(TFAIL, "kill(%d, SIGKILL) Failed, errno=%d : %s", fork_pid, TEST_ERRNO, strerror(TEST_ERRNO)); } else { /*************************************************************** * only perform functional verification if flag set (-f not given) ***************************************************************/ if (STD_FUNCTIONAL_TEST) { /* No Verification test, yet... */ tst_resm(TPASS, "kill(%d, SIGKILL) returned %ld", fork_pid, TEST_RETURN); } } /* * wait for process to cleanup zombies. * */ waitpid(0, &status, WNOHANG); } /* End for TEST_LOOPING */ /*************************************************************** * cleanup and exit ***************************************************************/ cleanup(); return 0; } /* End main */
/* //////////////////////////////////////////////////////////////////////////// -- Testing cungqr */ int main( int argc, char** argv ) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf, cpu_time; float Anorm, error, work[1]; magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE; magmaFloatComplex *hA, *hR, *tau, *h_work; magmaFloatComplex_ptr dA, dT; magma_int_t m, n, k; magma_int_t n2, lda, ldda, lwork, min_mn, nb, info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); opts.lapack |= opts.check; // check (-c) implies lapack (-l) printf("Running version %d; available are (specified through --version num):\n", (int) opts.version); printf("1 - uses precomputed clarft matrices (default)\n"); printf("2 - recomputes the clarft matrices on the fly\n\n"); printf(" m n k CPU GFlop/s (sec) GPU GFlop/s (sec) ||R|| / ||A||\n"); printf("=========================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; if ( m < n || n < k ) { printf( "%5d %5d %5d skipping because m < n or n < k\n", (int) m, (int) n, (int) k ); continue; } lda = m; ldda = ((m + 31)/32)*32; n2 = lda*n; min_mn = min(m, n); nb = magma_get_cgeqrf_nb( m ); lwork = (m + 2*n+nb)*nb; gflops = FLOPS_CUNGQR( m, n, k ) / 1e9; TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork ); TESTING_MALLOC_PIN( hR, magmaFloatComplex, lda*n ); TESTING_MALLOC_CPU( hA, magmaFloatComplex, lda*n ); TESTING_MALLOC_CPU( tau, magmaFloatComplex, min_mn ); TESTING_MALLOC_DEV( dA, magmaFloatComplex, ldda*n ); TESTING_MALLOC_DEV( dT, magmaFloatComplex, ( 2*min_mn + ((n + 31)/32)*32 )*nb ); lapackf77_clarnv( &ione, ISEED, &n2, hA ); lapackf77_clacpy( MagmaFullStr, &m, &n, hA, &lda, hR, &lda ); Anorm = lapackf77_clange("f", &m, &n, hA, &lda, work ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ // first, get QR factors in both hA and hR // okay that magma_cgeqrf_gpu has special structure for R; R isn't used here. magma_csetmatrix( m, n, hA, lda, dA, ldda ); magma_cgeqrf_gpu( m, n, dA, ldda, tau, dT, &info ); if (info != 0) printf("magma_cgeqrf_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); magma_cgetmatrix( m, n, dA, ldda, hA, lda ); lapackf77_clacpy( MagmaFullStr, &m, &n, hA, &lda, hR, &lda ); gpu_time = magma_wtime(); if (opts.version == 1) magma_cungqr( m, n, k, hR, lda, tau, dT, nb, &info ); else magma_cungqr2(m, n, k, hR, lda, tau, &info ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_cungqr_gpu returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); lapackf77_cungqr( &m, &n, &k, hA, &lda, tau, h_work, &lwork, &info ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_cungqr returned error %d: %s.\n", (int) info, magma_strerror( info )); // compute relative error |R|/|A| := |Q_magma - Q_lapack|/|A| blasf77_caxpy( &n2, &c_neg_one, hA, &ione, hR, &ione ); error = lapackf77_clange("f", &m, &n, hR, &lda, work) / Anorm; bool okay = (error < tol); status += ! okay; printf("%5d %5d %5d %7.1f (%7.2f) %7.1f (%7.2f) %8.2e %s\n", (int) m, (int) n, (int) k, cpu_perf, cpu_time, gpu_perf, gpu_time, error, (okay ? "ok" : "failed")); } else { printf("%5d %5d %5d --- ( --- ) %7.1f (%7.2f) --- \n", (int) m, (int) n, (int) k, gpu_perf, gpu_time ); } TESTING_FREE_PIN( h_work ); TESTING_FREE_PIN( hR ); TESTING_FREE_CPU( hA ); TESTING_FREE_CPU( tau ); TESTING_FREE_DEV( dA ); TESTING_FREE_DEV( dT ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing magma_ssymm_mgpu */ int main( int argc, char** argv) { TESTING_INIT(); float c_neg_one = MAGMA_S_NEG_ONE; float calpha = MAGMA_S_MAKE( 3.456, 5.678 ); float cbeta = MAGMA_S_MAKE( 1.234, 2.456 ); real_Double_t gflops, gpu_perf=0., cpu_perf=0., gpu_time=0., cpu_time=0.; real_Double_t gpu_perf2=0., gpu_time2=0.; float error=0., errorbis=0., work[1]; float *hA, *hX, *hB, *hR; float *dA[MagmaMaxGPUs], *dX[MagmaMaxGPUs], *dB[MagmaMaxGPUs], *dwork[MagmaMaxGPUs], *hwork[MagmaMaxGPUs+1]; float *dA2; magma_int_t M, N, size, lda, ldda, msize, nb, nstream; magma_int_t ione = 1; magma_int_t iseed[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); float tol = opts.tolerance * lapackf77_slamch("E"); // default values nb = (opts.nb > 0 ? opts.nb : 64); nstream = (opts.nstream > 0 ? opts.nstream : 2); magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2]; magma_int_t nbcmplx = 0; magma_buildconnection_mgpu(gnode, &nbcmplx, opts.ngpu); printf("Initializing communication pattern... GPU-ncmplx %d\n\n", (int) nbcmplx); for (int i=0; i < nbcmplx; ++i) { int myngpu = gnode[i][MagmaMaxGPUs]; printf("cmplx %d has %d gpu ", i, myngpu); for(int j=0; j < myngpu; ++j) printf(" %d", (int) gnode[i][j]); printf("\n"); } magma_int_t nbevents = 2; magma_queue_t streams[MagmaMaxGPUs][20]; magma_event_t redevents[MagmaMaxGPUs][20]; magma_event_t redevents2[MagmaMaxGPUs][MagmaMaxGPUs*MagmaMaxGPUs+10]; for( int d = 0; d < opts.ngpu; ++d ) { for( magma_int_t i = 0; i < nstream; ++i ) { magma_queue_create( &streams[d][i] ); } for( magma_int_t i = 0; i < nbevents; ++i ) { cudaEventCreateWithFlags(&redevents[d][i], cudaEventDisableTiming); cudaEventCreateWithFlags(&redevents2[d][i], cudaEventDisableTiming); } } printf( "nb %d, ngpu %d, nstream %d version %d\n", (int) nb, (int) opts.ngpu, (int) nstream, (int) opts.version ); printf(" M N nb offset CPU GFlop/s (sec) GPU GFlop/s (sec) CUBLAS hemm (sec) ||R|| / ||A||*||X||\n"); printf("=========================================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { M = opts.msize[itest]; N = opts.nsize[itest]; for( int offset = 0; offset < N; offset += min(N,nb) ) { for( int iter = 0; iter < opts.niter; ++iter ) { msize = M - offset; lda = M; ldda = ((M + 31)/32)*32; size = lda*M; gflops = FLOPS_SSYMM( MagmaLeft, (float)msize, (float)N ) / 1e9; magma_int_t dworksiz = ldda*N*3; magma_int_t hworksiz = lda*N; TESTING_MALLOC_CPU( hA, float, lda*M ); TESTING_MALLOC_CPU( hX, float, lda*N ); TESTING_MALLOC_CPU( hB, float, lda*N ); TESTING_MALLOC_PIN( hR, float, lda*N ); for( int d = 0; d < opts.ngpu; ++d ) { magma_int_t mlocal = ((M / nb) / opts.ngpu + 1) * nb; magma_setdevice( d ); TESTING_MALLOC_DEV( dA[d], float, ldda*mlocal ); TESTING_MALLOC_DEV( dX[d], float, ldda*N ); TESTING_MALLOC_DEV( dB[d], float, ldda*N ); TESTING_MALLOC_DEV( dwork[d], float, dworksiz ); TESTING_MALLOC_PIN( hwork[d], float, hworksiz ); } TESTING_MALLOC_PIN( hwork[opts.ngpu], float, lda*N ); if ( opts.check ) { magma_setdevice( 0 ); TESTING_MALLOC_DEV( dA2, float, ldda*M ); } lapackf77_slarnv( &ione, iseed, &size, hA ); magma_smake_symmetric( M, hA, lda ); size = lda*N; lapackf77_slarnv( &ione, iseed, &size, hX ); lapackf77_slarnv( &ione, iseed, &size, hB ); lapackf77_slacpy( "Full", &M, &N, hB, &lda, hR, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_ssetmatrix_1D_col_bcyclic( M, M, hA, lda, dA, ldda, opts.ngpu, nb ); for( int d = 0; d < opts.ngpu; ++d ) { magma_setdevice( d ); //magmablasSetKernelStream( streams[ d ][ 0 ] ); magma_ssetmatrix( M, N, hX, lda, dX[d], ldda ); //if (d == 0) magma_ssetmatrix( M, N, hB, lda, dB[d], ldda ); // this is wrong coz when offset != 0 the gpu who do the beta*C may be not 0 so this should be related to stdev(starting device who own i=0 first col) magma_ssetmatrix( M, N, hB, lda, dB[d], ldda ); } //memset(hR, 0, lda*N*sizeof(float)); //trace_init( 1, opts.ngpu, nstream, (magma_queue_t*) streams ); //magma_int_t offset = 0; //nb; gpu_time = magma_sync_wtime(0); magmablas_ssymm_mgpu_com( MagmaLeft, MagmaLower, msize, N, calpha, dA, ldda, offset, dX, ldda, cbeta, dB, ldda, dwork, dworksiz, hR, lda, hwork, hworksiz, opts.ngpu, nb, streams, nstream, redevents2, nbevents, gnode, nbcmplx); gpu_time = magma_sync_wtime(0) - gpu_time; gpu_perf = gflops / gpu_time; #ifdef TRACING char buf[80]; snprintf( buf, sizeof(buf), "ssymm-m%d-n%d-nb%d-stream%d-ngpu%d-run%d.svg", (int) M, (int) N, (int) nb, (int) nstream, (int) opts.ngpu, (int) j ); trace_finalize( buf, "trace.css" ); #endif /* ==================================================================== Performs operation using CUBLAS =================================================================== */ if ( opts.check && iter == 0 ) { magma_setdevice( 0 ); magmablasSetKernelStream( 0 ); magma_ssetmatrix( M, M, hA, lda, dA2, ldda ); magma_ssetmatrix( M, N, hX, lda, dX[0], ldda ); magma_ssetmatrix( M, N, hB, lda, dwork[0], ldda ); gpu_time2 = magma_sync_wtime(0); magma_ssymm( MagmaLeft, MagmaLower, msize, N, calpha, dA2+offset*ldda+offset, ldda, dX[0], ldda, cbeta, dwork[0], ldda ); gpu_time2 = magma_sync_wtime(0) - gpu_time2; gpu_perf2 = gflops / gpu_time2; } /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.check ) { // store ||A||*||X|| errorbis = lapackf77_slange("fro", &msize, &msize, hA+offset*lda+offset, &lda, work ); errorbis *= lapackf77_slange("fro", &msize, &N, hX, &lda, work ); //printf( "A =" ); magma_sprint( M, M, hA, lda ); //printf( "X =" ); magma_sprint( M, N, hX, lda ); //printf( "B =" ); magma_sprint( M, N, hB, lda ); cpu_time = magma_wtime(); blasf77_ssymm( "Left", "Lower", &msize, &N, &calpha, hA+offset*lda+offset, &lda, hX, &lda, &cbeta, hB, &lda ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; /* trace_file = fopen("AJETE/C", "w"); for (int j = 0; j < N; j++) for (int i = 0; i < siz; i++) fprintf(trace_file, "%10d%10d%40.30e\n", i+1, j+1, hB[j*lda+i]); fclose(trace_file); */ magma_int_t firstprint=0; for(magma_int_t dev=0; dev < opts.ngpu; ++dev) { magma_setdevice( dev ); magma_sgetmatrix( M, N, dB[dev], ldda, hR, lda ); // compute relative error ||R||/||A||*||X||, where R := B_magma - B_lapack = R - B size = lda*N; blasf77_saxpy( &size, &c_neg_one, hB, &ione, hR, &ione ); error = lapackf77_slange("fro", &msize, &N, hR, &lda, work) / errorbis; //printf( "R =" ); magma_sprint( M, N, hR, lda ); if (firstprint == 0) { printf( "%5d %5d %5d %5d %7.1f (%7.4f) %7.1f (%7.4f) %7.1f (%7.4f) %8.2e %s\n", (int) M, (int) N, (int) nb, (int) offset, cpu_perf, cpu_time, gpu_perf, gpu_time, gpu_perf2, gpu_time2, error, (error < tol ? "ok" : "failed") ); } else { printf( "%89s %8.2e %s\n", " ", error, (error < tol ? "ok" : "failed") ); } status += ! (error < tol); firstprint =1; } } else { printf( "%5d %5d %5d %5d --- ( --- ) %7.1f (%7.4f) --- ( --- ) ---\n", (int) M, (int) N, (int) nb, (int) offset, gpu_perf, gpu_time ); } TESTING_FREE_CPU( hA ); TESTING_FREE_CPU( hX ); TESTING_FREE_CPU( hB ); TESTING_FREE_PIN( hR ); for( int d = 0; d < opts.ngpu; ++d ) { magma_setdevice( d ); TESTING_FREE_DEV( dA[d] ); TESTING_FREE_DEV( dX[d] ); TESTING_FREE_DEV( dB[d] ); TESTING_FREE_DEV( dwork[d] ); TESTING_FREE_PIN( hwork[d] ); } TESTING_FREE_PIN( hwork[opts.ngpu] ); if ( opts.check ) { magma_setdevice( 0 ); TESTING_FREE_DEV( dA2 ); } fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } // offset printf( "\n" ); } for( int d = 0; d < opts.ngpu; ++d ) { magma_setdevice( d ); for( magma_int_t i = 0; i < nstream; ++i ) { magma_queue_destroy( streams[d][i] ); } for( magma_int_t i = 0; i < nbevents; ++i ) { magma_event_destroy( redevents[d][i] ); magma_event_destroy( redevents2[d][i] ); } } TESTING_FINALIZE(); return status; }
int main(int ac, char **av) { int lc, i; /* loop counter */ char *msg; /* message returned from parse_opts */ /* parse standard options */ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); setup(); for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; for (i = 0; i < TST_TOTAL; i++) { if (testcase[i].setupfunc && testcase[i].setupfunc() == -1) { tst_resm(TWARN, "Failed to setup test %d." " Skipping test", i); continue; } else { TEST(syscall(__NR_swapoff, testcase[i].path)); } if (testcase[i].cleanfunc && testcase[i].cleanfunc() == -1) { tst_brkm(TBROK, cleanup, "cleanup failed," " quitting the test"); } /* check return code */ if ((TEST_RETURN == -1) && (TEST_ERRNO == testcase[i]. exp_errno)) { tst_resm(TPASS, "swapoff(2) expected failure;" " Got errno - %s : %s", testcase[i].exp_errval, testcase[i].err_desc); } else { tst_resm(TFAIL, "swapoff(2) failed to produce" " expected error; %d, errno" ": %s and got %d", testcase[i].exp_errno, testcase[i].exp_errval, TEST_ERRNO); if ((TEST_RETURN == 0) && (i == 2)) { if (syscall(__NR_swapon, "./swapfile01", 0) != 0) { tst_brkm(TBROK, cleanup, " Failed to turn on" " swap file"); } } } TEST_ERROR_LOG(TEST_ERRNO); } /*End of TEST LOOPS */ } /*Clean up and exit */ cleanup(); tst_exit(); } /*End of main */
int main(int ac, char **av) { struct stat stat_buf; /* stat structure buffer */ int lc; const char *msg; char *file_name; /* ptr. for file name whose mode is modified */ char *test_desc; /* test specific error message */ int ind; /* counter to test different test conditions */ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); /* * Invoke setup function to call individual test setup functions * to simulate test conditions. */ setup(); /* set the expected errnos... */ TEST_EXP_ENOS(exp_enos); for (lc = 0; TEST_LOOPING(lc); lc++) { tst_count = 0; for (ind = 0; Test_cases[ind].desc != NULL; ind++) { file_name = Test_cases[ind].pathname; test_desc = Test_cases[ind].desc; #if !defined(UCLINUX) if (file_name == High_address_node) { file_name = (char *)get_high_address(); } #endif /* * Call stat(2) to test different test conditions. * verify that it fails with -1 return value and * sets appropriate errno. */ TEST(stat(file_name, &stat_buf)); /* Check return code from stat(2) */ if (TEST_RETURN == -1) { TEST_ERROR_LOG(TEST_ERRNO); if (TEST_ERRNO == Test_cases[ind].exp_errno) { tst_resm(TPASS, "stat() fails, %s, errno:%d", test_desc, TEST_ERRNO); } else { tst_resm(TFAIL, "stat() fails, %s, errno:%d, expected errno:%d", test_desc, TEST_ERRNO, Test_cases[ind].exp_errno); } } else { tst_resm(TFAIL, "stat(2) returned %ld, expected -1, errno:%d", TEST_RETURN, Test_cases[ind].exp_errno); } } tst_count++; /* incr TEST_LOOP counter */ } /* * Invoke cleanup() to delete the test directory/file(s) created * in the setup(). */ cleanup(); tst_exit(); }
int main(int ac, char *av[]) { int *fildes; int ifile; char pfilname[40]; int min; int serrno; int lc; const char *msg; ifile = -1; if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); local_flag = PASSED; setup(); for (lc = 0; TEST_LOOPING(lc); lc++) { min = getdtablesize(); /* get number of files allowed open */ fildes = malloc((min + 10) * sizeof(int)); if (fildes == NULL) tst_brkm(TBROK | TERRNO, cleanup, "malloc error"); sprintf(pfilname, "./dup205.%d\n", getpid()); unlink(pfilname); serrno = 0; if ((fildes[0] = creat(pfilname, 0666)) == -1) tst_brkm(TBROK | TERRNO, cleanup, "creat failed"); else { fildes[fildes[0]] = fildes[0]; for (ifile = fildes[0] + 1; ifile < min + 10; ifile++) { if ((fildes[ifile] = dup2(fildes[ifile - 1], ifile)) == -1) { serrno = errno; break; } else { if (fildes[ifile] != ifile) { tst_brkm(TFAIL, cleanup, "got wrong descriptor " "number back (%d != %d)", fildes[ifile], ifile); } } } /* end for */ if (ifile < min) { tst_resm(TFAIL, "Not enough files duped"); local_flag = FAILED; } else if (ifile > min) { tst_resm(TFAIL, "Too many files duped"); local_flag = FAILED; } if (serrno != EBADF && serrno != EMFILE && serrno != EINVAL) { tst_resm(TFAIL, "bad errno on dup2 failure"); local_flag = FAILED; } } unlink(pfilname); for (ifile = fildes[0]; ifile < min + 10; ifile++) close(fildes[ifile]); if (local_flag == PASSED) { tst_resm(TPASS, "Test passed."); } else { tst_resm(TFAIL, "Test failed."); } } cleanup(); tst_exit(); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing ssygvdx */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gpu_time; float *h_A, *h_R, *h_work; #if defined(PRECISION_z) || defined(PRECISION_c) float *rwork; magma_int_t lrwork; #endif /* Matrix size */ float *w1, *w2; magma_int_t *iwork; magma_int_t N, n2, info, lwork, liwork; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1};; magma_int_t info_ortho = 0; magma_int_t info_solution = 0; magma_int_t info_reduction = 0; magma_timestr_t start, end; magma_opts opts; parse_opts( argc, argv, &opts ); magma_int_t ngpu = opts.ngpu; char jobz = opts.jobz; magma_int_t checkres = opts.check; char range = 'A'; char uplo = opts.uplo; magma_int_t itype = opts.itype; float f = opts.fraction; if (f != 1) range='I'; if ( checkres && jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); jobz = MagmaVec; } printf("using: itype = %d, jobz = %c, range = %c, uplo = %c, checkres = %d, fraction = %6.4f\n", (int) itype, jobz, range, uplo, (int) checkres, f); printf(" N M GPU Time(s) \n"); printf("==========================\n"); magma_int_t threads = magma_get_numthreads(); for( magma_int_t i = 0; i < opts.ntest; ++i ) { for( magma_int_t iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[i]; n2 = N*N; #if defined(PRECISION_z) || defined(PRECISION_c) lwork = magma_sbulge_get_lq2(N, threads) + 2*N + N*N; lrwork = 1 + 5*N +2*N*N; #else lwork = magma_sbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N; #endif liwork = 3 + 5*N; /* Allocate host memory for the matrix */ TESTING_MALLOC( h_A, float, n2); TESTING_MALLOC( w1, float , N); TESTING_MALLOC( w2, float , N); TESTING_HOSTALLOC(h_R, float, n2); TESTING_HOSTALLOC(h_work, float, lwork); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_HOSTALLOC( rwork, float, lrwork); #endif TESTING_MALLOC( iwork, magma_int_t, liwork); /* Initialize the matrix */ lapackf77_slarnv( &ione, ISEED, &n2, h_A ); /* Make diagonal real */ for(int i=0; i<N; i++) { MAGMA_S_SET2REAL( h_A[i*N+i], MAGMA_S_REAL(h_A[i*N+i]) ); } magma_int_t m1 = 0; float vl = 0; float vu = 0; magma_int_t il = 0; magma_int_t iu = 0; if (range == 'I'){ il = 1; iu = (int) (f*N); } if(opts.warmup){ // ================================================================== // Warmup using MAGMA // ================================================================== lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); if(ngpu==1){ printf("calling ssyevdx_2stage 1 GPU\n"); magma_ssyevdx_2stage(jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); }else{ printf("calling ssyevdx_2stage_m %d GPU\n", (int) ngpu); magma_ssyevdx_2stage_m(ngpu, jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } } // =================================================================== // Performs operation using MAGMA // =================================================================== lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); start = get_current_time(); if(ngpu==1){ printf("calling ssyevdx_2stage 1 GPU\n"); magma_ssyevdx_2stage(jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); }else{ printf("calling ssyevdx_2stage_m %d GPU\n", (int) ngpu); magma_ssyevdx_2stage_m(ngpu, jobz, range, uplo, N, h_R, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } end = get_current_time(); gpu_time = GetTimerValue(start,end)/1000.; if ( checkres ) { float eps = lapackf77_slamch("E"); printf("\n"); printf("------ TESTS FOR MAGMA SSYEVD ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", (int) N, (int) N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is %8.2e\n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the orthogonality, reduction and the eigen solutions */ if (jobz == MagmaVec) { info_ortho = check_orthogonality(N, N, h_R, N, eps); info_reduction = check_reduction(uplo, N, 1, h_A, w1, N, h_R, eps); } printf("------ CALLING LAPACK SSYEVD TO COMPUTE only eigenvalue and verify elementswise ------- \n"); lapackf77_ssyevd("N", "L", &N, h_A, &N, w2, h_work, &lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, &lrwork, #endif iwork, &liwork, &info); info_solution = check_solution(N, w2, w1, eps); if ( (info_solution == 0) & (info_ortho == 0) & (info_reduction == 0) ) { printf("***************************************************\n"); printf(" ---- TESTING SSYEVD ...................... PASSED !\n"); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING SSYEVD ... FAILED !\n"); printf("************************************************\n"); } } /* ===================================================================== Print execution time =================================================================== */ printf("%5d %5d %6.2f\n", (int) N, (int) m1, gpu_time); TESTING_FREE( h_A); TESTING_FREE( w1); TESTING_FREE( w2); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_HOSTFREE( rwork); #endif TESTING_FREE( iwork); TESTING_HOSTFREE(h_work); TESTING_HOSTFREE( h_R); } if ( opts.niter > 1 ) { printf( "\n" ); } } /* Shutdown */ TESTING_FINALIZE(); return 0; }
int main(int ac, char **av) { int lc; /* loop counter */ char *msg; /* message returned from parse_opts */ pid_t cpid; /* Child process id */ int status; /* child exit status */ /* Parse standard options given to run the test. */ msg = parse_opts(ac, av, (option_t *) NULL, NULL); if (msg != (char *)NULL) { tst_brkm(TBROK, tst_exit, "OPTION PARSING ERROR - %s", msg); } #ifdef UCLINUX maybe_run_child(&do_child, "dddd", &timereq.tv_sec, &timereq.tv_nsec, &timerem.tv_sec, &timerem.tv_nsec); #endif /* Perform global setup for test */ setup(); /* Check looping state if -i option given */ for (lc = 0; TEST_LOOPING(lc); lc++) { /* Reset Tst_count in case we are looping. */ Tst_count = 0; /* * Creat a child process and suspend its * execution using nanosleep() */ if ((cpid = FORK_OR_VFORK()) == -1) { tst_brkm(TBROK, cleanup, "fork() failed to create child process"); } if (cpid == 0) { /* Child process */ #ifdef UCLINUX if (self_exec(av[0], "dddd", timereq.tv_sec, timereq.tv_nsec, timerem.tv_sec, timerem.tv_nsec) < 0) { tst_brkm(TBROK, cleanup, "self_exec failed"); } #else do_child(); #endif } /* wait for child to time slot for execution */ sleep(1); /* Now send signal to child */ if (kill(cpid, SIGINT) < 0) { tst_brkm(TBROK, cleanup, "kill() fails send signal to child"); } /* Wait for child to execute */ wait(&status); if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { tst_resm(TPASS, "Functionality of nanosleep() is " "correct"); } else { tst_resm(TFAIL, "child process exited abnormally; " "status = %d", status); } } /* End for TEST_LOOPING */ /* Call cleanup() to undo setup done for the test. */ cleanup(); /*NOTREACHED*/ return 0; } /* End main */
int main(int ac, char **av) { int lc; /* loop counter */ char *msg; /* message returned from parse_opts */ struct flock flock; /* parse standard options */ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) { tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); } setup(); /* check for looping state if -i option is given */ for (lc = 0; TEST_LOOPING(lc); lc++) { /* reset Tst_count in case we are looping */ Tst_count = 0; /* //block1: */ tst_resm(TINFO, "Enter block 1"); tst_resm(TINFO, "Test for errno EINVAL"); fail = 0; if (fcntl(1, F_BADCMD, 1) != -1) { tst_resm(TFAIL, "fcntl(2) failed to FAIL"); fail = 1; } else if (errno != EINVAL) { tst_resm(TFAIL, "Expected EINVAL got %d", errno); fail = 1; } if (fail) { tst_resm(TINFO, "block 1 FAILED"); } else { tst_resm(TINFO, "block 1 PASSED"); } tst_resm(TINFO, "Exit block 1"); /* //block2: */ #ifndef UCLINUX /* Skip since uClinux does not implement memory protection */ tst_resm(TINFO, "Enter block 2"); tst_resm(TINFO, "Test for errno EFAULT"); fail = 0; /* case 1: F_SETLK */ if (fcntl(1, F_SETLK, (void *)-1) != -1) { tst_resm(TFAIL, "F_SETLK: fcntl(2) failed to FAIL"); fail = 1; } else if (errno != EFAULT) { tst_resm(TFAIL, "F_SETLK: Expected EFAULT got %d", errno); fail = 1; } /* case 2: F_SETLKW */ if (fcntl(1, F_SETLKW, (void *)-1) != -1) { tst_resm(TFAIL, "F_SETLKW: fcntl(2) failed to FAIL"); fail = 1; } else if (errno != EFAULT) { tst_resm(TFAIL, "F_SETLKW: Expected EFAULT got %d", errno); fail = 1; } /* case 3: F_GETLK */ if (fcntl(1, F_GETLK, (void *)-1) != -1) { tst_resm(TFAIL, "F_GETLK: fcntl(2) failed to FAIL"); fail = 1; } else if (errno != EFAULT) { tst_resm(TFAIL, "F_GETLK: Expected EFAULT got %d", errno); fail = 1; } if (fail) { tst_resm(TINFO, "blcok 2 FAILED"); } else { tst_resm(TINFO, "block 2 PASSED"); } tst_resm(TINFO, "Exit block 2"); #else tst_resm(TINFO, "Skip block 2 on uClinux"); #endif /* //block3: */ tst_resm(TINFO, "Enter block 3"); tst_resm(TINFO, "Test for errno EINVAL"); fail = 0; flock.l_whence = -1; flock.l_type = F_WRLCK; flock.l_start = 0L; flock.l_len = 0L; if (fcntl(1, F_SETLK, &flock) != -1) { tst_resm(TFAIL, "fcntl(2) failed to FAIL"); fail = 1; } else if (errno != EINVAL) { tst_resm(TFAIL, "Expected EINVAL, got %d", errno); fail = 1; } if (fail) { tst_resm(TINFO, "block 3 FAILED"); } else { tst_resm(TINFO, "block 3 PASSED"); } tst_resm(TINFO, "Exit block 3"); /* //block4: */ tst_resm(TINFO, "Enter block 4"); tst_resm(TINFO, "Test for errno EBADF"); fail = 0; if (fcntl(-1, F_GETLK, &flock) != -1) { tst_resm(TFAIL, "fcntl(2) failed to FAIL"); fail = 1; } else if (errno != EBADF) { tst_resm(TFAIL, "Expected EBADF, got %d", errno); fail = 1; } if (fail) { tst_resm(TINFO, "block 4 FAILED"); } else { tst_resm(TINFO, "block 4 PASSED"); } tst_resm(TINFO, "Exit block 4"); } cleanup(); tst_exit(); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zgemm */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, magma_perf, magma_time, dev_perf, dev_time, cpu_perf, cpu_time; double magma_error, dev_error, Cnorm, work[1]; magma_int_t M, N, K; magma_int_t Am, An, Bm, Bn; magma_int_t sizeA, sizeB, sizeC; magma_int_t lda, ldb, ldc, ldda, lddb, lddc; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magmaDoubleComplex *h_A, *h_B, *h_C, *h_Cmagma, *h_Cdev; magmaDoubleComplex_ptr d_A, d_B, d_C; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; magmaDoubleComplex alpha = MAGMA_Z_MAKE( 0.29, -0.86 ); magmaDoubleComplex beta = MAGMA_Z_MAKE( -0.48, 0.38 ); magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); #ifdef HAVE_CUBLAS // for CUDA, we can check MAGMA vs. CUBLAS, without running LAPACK printf("If running lapack (option --lapack), MAGMA and %s error are both computed\n" "relative to CPU BLAS result. Else, MAGMA error is computed relative to %s result.\n\n", g_platform_str, g_platform_str ); printf("transA = %s, transB = %s\n", lapack_trans_const(opts.transA), lapack_trans_const(opts.transB) ); printf(" M N K MAGMA Gflop/s (ms) %s Gflop/s (ms) CPU Gflop/s (ms) MAGMA error %s error\n", g_platform_str, g_platform_str ); #else // for others, we need LAPACK for check opts.lapack |= opts.check; // check (-c) implies lapack (-l) printf("transA = %s, transB = %s\n", lapack_trans_const(opts.transA), lapack_trans_const(opts.transB) ); printf(" M N K %s Gflop/s (ms) CPU Gflop/s (ms) %s error\n", g_platform_str, g_platform_str ); #endif printf("=========================================================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; K = opts.ksize[itest]; gflops = FLOPS_ZGEMM( M, N, K ) / 1e9; if ( opts.transA == MagmaNoTrans ) { lda = Am = M; An = K; } else { lda = Am = K; An = M; } if ( opts.transB == MagmaNoTrans ) { ldb = Bm = K; Bn = N; } else { ldb = Bm = N; Bn = K; } ldc = M; ldda = ((lda+31)/32)*32; lddb = ((ldb+31)/32)*32; lddc = ((ldc+31)/32)*32; sizeA = lda*An; sizeB = ldb*Bn; sizeC = ldc*N; TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*An ); TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, ldb*Bn ); TESTING_MALLOC_CPU( h_C, magmaDoubleComplex, ldc*N ); TESTING_MALLOC_CPU( h_Cmagma, magmaDoubleComplex, ldc*N ); TESTING_MALLOC_CPU( h_Cdev, magmaDoubleComplex, ldc*N ); TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*An ); TESTING_MALLOC_DEV( d_B, magmaDoubleComplex, lddb*Bn ); TESTING_MALLOC_DEV( d_C, magmaDoubleComplex, lddc*N ); /* Initialize the matrices */ lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B ); lapackf77_zlarnv( &ione, ISEED, &sizeC, h_C ); magma_zsetmatrix( Am, An, h_A, lda, d_A, ldda ); magma_zsetmatrix( Bm, Bn, h_B, ldb, d_B, lddb ); /* ===================================================================== Performs operation using MAGMABLAS (currently only with CUDA) =================================================================== */ #ifdef HAVE_CUBLAS magma_zsetmatrix( M, N, h_C, ldc, d_C, lddc ); magma_time = magma_sync_wtime( NULL ); magmablas_zgemm( opts.transA, opts.transB, M, N, K, alpha, d_A, ldda, d_B, lddb, beta, d_C, lddc ); magma_time = magma_sync_wtime( NULL ) - magma_time; magma_perf = gflops / magma_time; magma_zgetmatrix( M, N, d_C, lddc, h_Cmagma, ldc ); #endif /* ===================================================================== Performs operation using CUBLAS / clBLAS / Xeon Phi MKL =================================================================== */ magma_zsetmatrix( M, N, h_C, ldc, d_C, lddc ); #ifdef HAVE_CUBLAS dev_time = magma_sync_wtime( NULL ); cublasZgemm( opts.handle, cublas_trans_const(opts.transA), cublas_trans_const(opts.transB), M, N, K, &alpha, d_A, ldda, d_B, lddb, &beta, d_C, lddc ); dev_time = magma_sync_wtime( NULL ) - dev_time; #else dev_time = magma_sync_wtime( opts.queue ); magma_zgemm( opts.transA, opts.transB, M, N, K, alpha, d_A, 0, ldda, d_B, 0, lddb, beta, d_C, 0, lddc, opts.queue ); dev_time = magma_sync_wtime( opts.queue ) - dev_time; #endif dev_perf = gflops / dev_time; magma_zgetmatrix( M, N, d_C, lddc, h_Cdev, ldc ); /* ===================================================================== Performs operation using CPU BLAS =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); blasf77_zgemm( lapack_trans_const(opts.transA), lapack_trans_const(opts.transB), &M, &N, &K, &alpha, h_A, &lda, h_B, &ldb, &beta, h_C, &ldc ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; } /* ===================================================================== Check the result =================================================================== */ if ( opts.lapack ) { // compute relative error for both magma & dev, relative to lapack, // |C_magma - C_lapack| / |C_lapack| Cnorm = lapackf77_zlange( "F", &M, &N, h_C, &ldc, work ); blasf77_zaxpy( &sizeC, &c_neg_one, h_C, &ione, h_Cdev, &ione ); dev_error = lapackf77_zlange( "F", &M, &N, h_Cdev, &ldc, work ) / Cnorm; #ifdef HAVE_CUBLAS blasf77_zaxpy( &sizeC, &c_neg_one, h_C, &ione, h_Cmagma, &ione ); magma_error = lapackf77_zlange( "F", &M, &N, h_Cmagma, &ldc, work ) / Cnorm; printf("%5d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %8.2e %s\n", (int) M, (int) N, (int) K, magma_perf, 1000.*magma_time, dev_perf, 1000.*dev_time, cpu_perf, 1000.*cpu_time, magma_error, dev_error, (magma_error < tol && dev_error < tol ? "ok" : "failed")); status += ! (magma_error < tol && dev_error < tol); #else printf("%5d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e %s\n", (int) M, (int) N, (int) K, dev_perf, 1000.*dev_time, cpu_perf, 1000.*cpu_time, dev_error, (dev_error < tol ? "ok" : "failed")); status += ! (dev_error < tol); #endif } else { #ifdef HAVE_CUBLAS // compute relative error for magma, relative to dev (currently only with CUDA) Cnorm = lapackf77_zlange( "F", &M, &N, h_Cdev, &ldc, work ); blasf77_zaxpy( &sizeC, &c_neg_one, h_Cdev, &ione, h_Cmagma, &ione ); magma_error = lapackf77_zlange( "F", &M, &N, h_Cmagma, &ldc, work ) / Cnorm; printf("%5d %5d %5d %7.2f (%7.2f) %7.2f (%7.2f) --- ( --- ) %8.2e --- %s\n", (int) M, (int) N, (int) K, magma_perf, 1000.*magma_time, dev_perf, 1000.*dev_time, magma_error, (magma_error < tol ? "ok" : "failed")); status += ! (magma_error < tol); #else printf("%5d %5d %5d %7.2f (%7.2f) --- ( --- ) ---\n", (int) M, (int) N, (int) K, dev_perf, 1000.*dev_time ); #endif } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( h_C ); TESTING_FREE_CPU( h_Cmagma ); TESTING_FREE_CPU( h_Cdev ); TESTING_FREE_DEV( d_A ); TESTING_FREE_DEV( d_B ); TESTING_FREE_DEV( d_C ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
int main(int ac, char **av) { int lc; const char *msg; int rval; pid_t pid, pid1; int status; struct passwd *ltpuser1, *ltpuser2; /* * parse standard options */ if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) { tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); } /* * perform global setup for test */ setup(); /* set the expected errnos... */ TEST_EXP_ENOS(exp_enos); /* * check looping state if -i option given */ for (lc = 0; TEST_LOOPING(lc); lc++) { tst_count = 0; /* Initialize the test directories name */ sprintf(tstdir1, "tstdir1.%d", getpid()); ltpuser1 = my_getpwnam(user1name); if ((pid = FORK_OR_VFORK()) < 0) { tst_brkm(TBROK, cleanup, "fork #1 failed"); } if (pid == 0) { /* first child */ /* set to ltpuser1 */ rval = setreuid(ltpuser1->pw_uid, ltpuser1->pw_uid); if (rval < 0) { tst_resm(TFAIL, "setreuid failed to " "to set the real uid to %d and " "effective uid to %d", ltpuser1->pw_uid, ltpuser1->pw_uid); perror("setreuid"); exit(1); } /* create the parent directory with 0700 permits */ if (mkdir(tstdir1, PERMS) == -1) { tst_resm(TFAIL, "mkdir(%s, %#o) Failed", tstdir1, PERMS); exit(1); } /* create tstdir1 succeeded */ exit(0); } wait(&status); if (WEXITSTATUS(status) != 0) { tst_brkm(TFAIL, cleanup, "Test to check mkdir EACCES failed" "in create parent directory"); } sprintf(tstdir2, "%s/tst", tstdir1); ltpuser2 = my_getpwnam(user2name); if ((pid1 = FORK_OR_VFORK()) < 0) { tst_brkm(TBROK, cleanup, "fork #2 failed"); } if (pid1 == 0) { /* second child */ /* set to ltpuser2 */ rval = setreuid(ltpuser2->pw_uid, ltpuser2->pw_uid); if (rval < 0) { tst_resm(TFAIL, "setreuid failed to " "to set the real uid to %d and " "effective uid to %d", ltpuser2->pw_uid, ltpuser2->pw_uid); perror("setreuid"); exit(1); } if (mkdir(tstdir2, PERMS) != -1) { tst_resm(TFAIL, "mkdir(%s, %#o) unexpected " "succeeded", tstdir2, PERMS); exit(1); } if (errno != EACCES) { tst_resm(TFAIL, "Expected EACCES got %d", errno); exit(1); } /* PASS */ exit(0); } waitpid(pid1, &status, 0); if (WEXITSTATUS(status) == 0) { tst_resm(TPASS, "Test to attempt to creat a directory " "in a directory having no permissions " "SUCCEEDED in setting errno to EACCES"); } else { tst_resm(TFAIL, "Test to attempt to creat a directory " "in a directory having no permissions FAILED"); cleanup(); } } /* * cleanup and exit */ cleanup(); tst_exit(); }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zhetrd_he2hb */ int main( int argc, char** argv) { TESTING_INIT_MGPU(); real_Double_t gpu_time, gpu_perf, gflops; magmaDoubleComplex *h_A, *h_R, *h_work, *dT1; magmaDoubleComplex *tau; double *D, *E; /* Matrix size */ magma_int_t N, n2, lda, lwork, ldt, lwork0; magma_int_t info; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; #if defined(CHECKEIG) #if defined(PRECISION_z) || defined(PRECISION_d) magma_int_t WANTZ=0; magma_int_t THREADS=1; #endif #endif magma_int_t NE = 0; magma_int_t NB = 0; magma_int_t ngpu = 1; magma_opts opts; parse_opts( argc, argv, &opts ); NB = opts.nb; if (NB < 1) NB = 64; //64; //magma_get_zhetrd_he2hb_nb(N); // what is NE ? if (NE < 1) NE = 64; //N; //magma_get_zhetrd_he2hb_nb(N); // N not yet initialized printf(" N GPU GFlop/s \n"); printf("=====================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; lda = N; ldt = N; n2 = N*lda; gflops = FLOPS_ZHETRD( N ) / 1e9; /* We suppose the magma NB is bigger than lapack NB */ lwork0 = N*NB; /* Allocate host memory for the matrix */ TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, lda*N ); TESTING_MALLOC_CPU( tau, magmaDoubleComplex, N-1 ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, lda*N ); TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork0 ); TESTING_MALLOC_PIN( D, double, N ); TESTING_MALLOC_PIN( E, double, N ); //TESTING_MALLOC_DEV( dT1, magmaDoubleComplex, (2*min(N,N)+(N+31)/32*32)*NB ); TESTING_MALLOC_DEV( dT1, magmaDoubleComplex, (N*NB) ); // if (WANTZ) gflops = 2.0*gflops; /* ==================================================================== Initialize the matrix =================================================================== */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); magma_zmake_hermitian( N, h_A, lda ); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda ); /* ==================================================================== Performs operation using MAGMA =================================================================== */ magma_device_t cdev; magma_getdevice( &cdev ); gpu_time = magma_wtime(); /* magma_zhetrd_he2hb( opts.uplo, N, NB, h_R, lda, tau, h_work, lwork0, dT1, THREADS, &info); tband = magma_wtime - gpu_time(); printf(" Finish BAND N %d NB %d ngpu %d timing= %f\n", N, NB, ngpu, tband); magma_zhetrd_bhe2trc_v5(THREADS, WANTZ, opts.uplo, NE, N, NB, h_R, lda, D, E, dT1, ldt); */ /* magma_zhetrd_he2hb( opts.uplo, N, NB, h_R, lda, tau, h_work, lwork, dT1, THREADS, &info); tband = magma_wtime - gpu_time(); printf(" Finish BAND N %d NB %d ngpu %d timing= %f\n", N, NB, ngpu, tband); magma_zhetrd_bhe2trc(THREADS, WANTZ, opts.uplo, NE, N, NB, h_R, lda, D, E, dT1, ldt); */ magma_range_t range = MagmaRangeAll; magma_int_t fraction_ev = 100; magma_int_t il, iu, m1; double vl=0., vu=0.; if (fraction_ev == 0) { il = N / 10; iu = N / 5+il; } else { il = 1; iu = (int)(fraction_ev*N); if (iu < 1) iu = 1; } magmaDoubleComplex *hh_work; magma_int_t *iwork; magma_int_t nb, /*lwork,*/ liwork; magma_int_t threads = magma_get_parallel_numthreads(); #if defined(PRECISION_z) || defined(PRECISION_c) double *rwork; magma_int_t lrwork; lwork = magma_zbulge_get_lq2(N, threads) + 2*N + N*N; lrwork = 1 + 5*N +2*N*N; TESTING_MALLOC_PIN( rwork, double, lrwork ); #else lwork = magma_zbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N; #endif liwork = 3 + 5*N; nb = magma_get_zhetrd_nb(N); TESTING_MALLOC_PIN( hh_work, magmaDoubleComplex, lwork ); TESTING_MALLOC_CPU( iwork, magma_int_t, liwork ); if (ngpu == 1) { printf("calling zheevdx_2stage 1 GPU\n"); magma_zheevdx_2stage( opts.jobz, range, opts.uplo, N, h_R, lda, vl, vu, il, iu, &m1, D, hh_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } else { printf("calling zheevdx_2stage_m %d GPU\n", (int) ngpu); magma_zheevdx_2stage_m(ngpu, opts.jobz, range, opts.uplo, N, h_R, lda, vl, vu, il, iu, &m1, D, hh_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info); } magma_setdevice( cdev ); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; /* ===================================================================== Check the factorization =================================================================== */ /* if ( opts.check ) { FILE *fp ; printf("Writing input matrix in matlab_i_mat.txt ...\n"); fp = fopen ("matlab_i_mat.txt", "w") ; if ( fp == NULL ) { printf("Couldn't open output file\n"); exit(1); } for (j=0; j < N; j++) { for (k=0; k < N; k++) { #if defined(PRECISION_z) || defined(PRECISION_c) fprintf(fp, "%5d %5d %11.8f %11.8f\n", k+1, j+1, h_A[k+j*lda].x, h_A[k+j*lda].y); #else fprintf(fp, "%5d %5d %11.8f\n", k+1, j+1, h_A[k+j*lda]); #endif } } fclose( fp ) ; printf("Writing output matrix in matlab_o_mat.txt ...\n"); fp = fopen ("matlab_o_mat.txt", "w") ; if ( fp == NULL ) { printf("Couldn't open output file\n"); exit(1); } for (j=0; j < N; j++) { for (k=0; k < N; k++) { #if defined(PRECISION_z) || defined(PRECISION_c) fprintf(fp, "%5d %5d %11.8f %11.8f\n", k+1, j+1, h_R[k+j*lda].x, h_R[k+j*lda].y); #else fprintf(fp, "%5d %5d %11.8f\n", k+1, j+1, h_R[k+j*lda]); #endif } } fclose( fp ) ; } */ /* ===================================================================== Print performance and error. =================================================================== */ #if defined(CHECKEIG) #if defined(PRECISION_z) || defined(PRECISION_d) if ( opts.check ) { printf(" Total N %5d gflops %6.2f timing %6.2f seconds\n", (int) N, gpu_perf, gpu_time ); char JOBZ; if (WANTZ == 0) JOBZ = 'N'; else JOBZ = 'V'; double nrmI=0.0, nrm1=0.0, nrm2=0.0; int lwork2 = 256*N; magmaDoubleComplex *work2, *AINIT; double *rwork2, *D2; // TODO free this memory ! magma_zmalloc_cpu( &work2, lwork2 ); magma_dmalloc_cpu( &rwork2, N ); magma_dmalloc_cpu( &D2, N ); magma_zmalloc_cpu( &AINIT, N*lda ); memcpy(AINIT, h_A, N*lda*sizeof(magmaDoubleComplex)); /* compute the eigenvalues using lapack routine to be able to compare to it and used as ref */ cpu_time = magma_wtime(); i= min(12, THREADS); #if defined(USEMKL) mkl_set_num_threads( i ); #endif #if defined(USEACML) omp_set_num_threads(i); #endif lapackf77_zheev( "N", "L", &N, h_A, &lda, D2, work2, &lwork2, #if defined(PRECISION_z) || defined (PRECISION_c) rwork2, #endif &info ); ///* call eigensolver for our resulting tridiag [D E] and for Q */ //dstedc_withZ('V', N, D, E, h_R, lda); ////dsterf_( &N, D, E, &info); //// cpu_time = magma_wtime() - cpu_time; printf(" Finish CHECK - EIGEN timing= %f threads %d\n", cpu_time, i); /* for (i=0; i < 10; i++) printf(" voici lpk D[%d] %8.2e\n", i, D2[i]); */ //magmaDoubleComplex mydz=0.0, mydo=1.0; //magmaDoubleComplex *Z; // magma_zmalloc_cpu( &Z, N*lda ); // dgemm_("N", "N", &N, &N, &N, &mydo, h_R, &lda, h_A, &lda, &mydz, Z, &lda); /* compare result */ cmp_vals(N, D2, D, &nrmI, &nrm1, &nrm2); magmaDoubleComplex *WORKAJETER; double *RWORKAJETER, *RESU; // TODO free this memory ! magma_zmalloc_cpu( &WORKAJETER, (2* N * N + N) ); magma_dmalloc_cpu( &RWORKAJETER, N ); magma_dmalloc_cpu( &RESU, 10 ); int MATYPE; memset(RESU, 0, 10*sizeof(double)); MATYPE=3; double NOTHING=0.0; cpu_time = magma_wtime(); // check results zcheck_eig_(&JOBZ, &MATYPE, &N, &NB, AINIT, &lda, &NOTHING, &NOTHING, D2, D, h_R, &lda, WORKAJETER, RWORKAJETER, RESU ); cpu_time = magma_wtime() - cpu_time; printf(" Finish CHECK - results timing= %f\n", cpu_time); #if defined(USEMKL) mkl_set_num_threads( 1 ); #endif #if defined(USEACML) omp_set_num_threads(1); #endif printf("\n"); printf(" ================================================================================================================\n"); printf(" ==> INFO voici threads=%d N=%d NB=%d WANTZ=%d\n", (int) THREADS, (int) N, (int) NB, (int) WANTZ); printf(" ================================================================================================================\n"); printf(" DSBTRD : %15s \n", "STATblgv9withQ "); printf(" ================================================================================================================\n"); if (WANTZ > 0) printf(" | A - U S U' | / ( |A| n ulp ) : %15.3E \n", RESU[0]); if (WANTZ > 0) printf(" | I - U U' | / ( n ulp ) : %15.3E \n", RESU[1]); printf(" | D1 - EVEIGS | / (|D| ulp) : %15.3E \n", RESU[2]); printf(" max | D1 - EVEIGS | : %15.3E \n", RESU[6]); printf(" ================================================================================================================\n\n\n"); printf(" ****************************************************************************************************************\n"); printf(" * Hello here are the norm Infinite (max)=%8.2e norm one (sum)=%8.2e norm2(sqrt)=%8.2e *\n", nrmI, nrm1, nrm2); printf(" ****************************************************************************************************************\n\n"); } #endif #endif printf(" Total N %5d gflops %6.2f timing %6.2f seconds\n", (int) N, gpu_perf, gpu_time ); printf("============================================================================\n\n\n"); /* Memory clean up */ TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( tau ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_work ); TESTING_FREE_PIN( D ); TESTING_FREE_PIN( E ); TESTING_FREE_DEV( dT1 ); /* TODO - not all memory has been freed inside loop */ fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE_MGPU(); return EXIT_SUCCESS; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing strmm */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, cublas_perf, cublas_time, cpu_perf, cpu_time; float cublas_error, Cnorm, work[1]; magma_int_t M, N; magma_int_t Ak; magma_int_t sizeA, sizeB; magma_int_t lda, ldb, ldda, lddb; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; float *h_A, *h_B, *h_Bcublas; float *d_A, *d_B; float c_neg_one = MAGMA_S_NEG_ONE; float alpha = MAGMA_S_MAKE( 0.29, -0.86 ); magma_opts opts; parse_opts( argc, argv, &opts ); printf("If running lapack (option --lapack), MAGMA and CUBLAS error are both computed\n" "relative to CPU BLAS result. Else, MAGMA error is computed relative to CUBLAS result.\n\n" "side = %c, uplo = %c, transA = %c, diag = %c \n", opts.side, opts.uplo, opts.transA, opts.diag ); printf(" M N CUBLAS Gflop/s (ms) CPU Gflop/s (ms) CUBLAS error\n"); printf("==================================================================\n"); for( int i = 0; i < opts.ntest; ++i ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[i]; N = opts.nsize[i]; gflops = FLOPS_STRMM(opts.side, M, N) / 1e9; if ( opts.side == MagmaLeft ) { lda = M; Ak = M; } else { lda = N; Ak = N; } ldb = M; ldda = ((lda+31)/32)*32; lddb = ((ldb+31)/32)*32; sizeA = lda*Ak; sizeB = ldb*N; TESTING_MALLOC( h_A, float, lda*Ak ); TESTING_MALLOC( h_B, float, ldb*N ); TESTING_MALLOC( h_Bcublas, float, ldb*N ); TESTING_DEVALLOC( d_A, float, ldda*Ak ); TESTING_DEVALLOC( d_B, float, lddb*N ); /* Initialize the matrices */ lapackf77_slarnv( &ione, ISEED, &sizeA, h_A ); lapackf77_slarnv( &ione, ISEED, &sizeB, h_B ); /* ===================================================================== Performs operation using CUDA-BLAS =================================================================== */ magma_ssetmatrix( Ak, Ak, h_A, lda, d_A, ldda ); magma_ssetmatrix( M, N, h_B, ldb, d_B, lddb ); cublas_time = magma_sync_wtime( NULL ); cublasStrmm( opts.side, opts.uplo, opts.transA, opts.diag, M, N, alpha, d_A, ldda, d_B, lddb ); cublas_time = magma_sync_wtime( NULL ) - cublas_time; cublas_perf = gflops / cublas_time; magma_sgetmatrix( M, N, d_B, lddb, h_Bcublas, ldb ); /* ===================================================================== Performs operation using CPU BLAS =================================================================== */ if ( opts.lapack ) { cpu_time = magma_wtime(); blasf77_strmm( &opts.side, &opts.uplo, &opts.transA, &opts.diag, &M, &N, &alpha, h_A, &lda, h_B, &ldb ); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; } /* ===================================================================== Check the result =================================================================== */ if ( opts.lapack ) { // compute relative error for both magma & cublas, relative to lapack, // |C_magma - C_lapack| / |C_lapack| Cnorm = lapackf77_slange( "M", &M, &N, h_B, &ldb, work ); blasf77_saxpy( &sizeB, &c_neg_one, h_B, &ione, h_Bcublas, &ione ); cublas_error = lapackf77_slange( "M", &M, &N, h_Bcublas, &ldb, work ) / Cnorm; printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f) %8.2e\n", (int) M, (int) N, cublas_perf, 1000.*cublas_time, cpu_perf, 1000.*cpu_time, cublas_error ); } else { printf("%5d %5d %7.2f (%7.2f) --- ( --- ) --- ---\n", (int) M, (int) N, cublas_perf, 1000.*cublas_time); } TESTING_FREE( h_A ); TESTING_FREE( h_B ); TESTING_FREE( h_Bcublas ); TESTING_DEVFREE( d_A ); TESTING_DEVFREE( d_B ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return 0; }
int main(int ac, char **av) { int lc; char *ptr; /* message returned from parse_opts */ gid_t group; int i; int entries; /* number of group entries */ initgroups("root", 0); if ((ptr = parse_opts(ac, av, NULL, NULL)) != NULL) tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", ptr); setup(); for (lc = 0; TEST_LOOPING(lc); lc++) { Tst_count = 0; TEST(getgroups(-1, gidset)); if (TEST_RETURN == 0) tst_resm(TFAIL, "getgroups succeeded unexpectedly"); else if (STD_FUNCTIONAL_TEST) { if (errno == EINVAL) tst_resm(TPASS, "getgroups failed as expected with EINVAL"); else tst_resm(TFAIL | TTERRNO, "getgroups didn't fail as expected with EINVAL"); } /* * Check that if ngrps is zero that the number of groups is * return and the the gidset array is not modified. * This is a POSIX special case. */ memset(gidset, 052, NGROUPS); memset(cmpset, 052, NGROUPS); TEST(getgroups(0, gidset)); if (TEST_RETURN == -1) tst_resm(TFAIL | TTERRNO, "getgroups failed"); else if (STD_FUNCTIONAL_TEST) { if (memcmp(cmpset, gidset, NGROUPS) != 0) tst_resm(TFAIL, "getgroups modified the gidset array"); else tst_resm(TPASS, "getgroups did not modify the gidset " "array"); } /* * Check to see that is -1 is returned and errno is set to * EINVAL when ngroups is not big enough to hold all groups. */ if (TEST_RETURN <= 1) tst_resm(TCONF, "getgroups returned %ld; unable to test that using ngrps >=1 but less than number of grps", TEST_RETURN); else { TEST(getgroups(TEST_RETURN - 1, gidset)); if (TEST_RETURN == -1) { if (STD_FUNCTIONAL_TEST) { if (errno == EINVAL) tst_resm(TPASS, "getgroups failed as " "expected with EINVAL"); else tst_resm(TFAIL | TERRNO, "getgroups didn't fail " "with EINVAL"); } } else tst_resm(TFAIL, "getgroups succeeded unexpectedly with %ld", TEST_RETURN); } TEST(getgroups(NGROUPS, gidset)); if ((entries = TEST_RETURN) == -1) tst_resm(TFAIL | TTERRNO, "getgroups failed unexpectedly"); else if (STD_FUNCTIONAL_TEST) { group = getgid(); for (i = 0; i < entries; i++) if (gidset[i] == group) { tst_resm(TPASS, "getgroups(NGROUPS,gidset) " "returned %d contains gid %d " "(from getgid)", entries, group); break; } if (i == entries) tst_resm(TFAIL, "getgroups(NGROUPS,gidset) ret %d, does " "not contain gid %d (from getgid)", entries, group); } } cleanup(); tst_exit(); }
int main(int ac, char **av) { int lc; /* loop counter */ char *msg; /* message returned from parse_opts */ pid_t cpid; /* process id of the child process */ int exit_status; /* exit status of child process */ sigset_t PendSig; /* variable to hold pending signal */ /* Parse standard options given to run the test. */ msg = parse_opts(ac, av, (option_t *) NULL, NULL); if (msg != (char *) NULL) { tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); tst_exit(); } /* Perform global setup for test */ setup(); /* set the expected errnos... */ TEST_EXP_ENOS(exp_enos); /* Check looping state if -i option given */ for (lc = 0; TEST_LOOPING(lc); lc++) { /* Reset Tst_count in case we are looping. */ Tst_count=0; /* * Call vfork(2) to create a child process without * fully copying the address space of parent. */ TEST(vfork()); /* check return code of vfork() */ if ((cpid = TEST_RETURN) == -1) { TEST_ERROR_LOG(TEST_ERRNO); tst_resm(TFAIL, "vfork() Failed, errno=%d : %s", TEST_ERRNO, strerror(TEST_ERRNO)); } else if (cpid == 0) { /* Child process */ /* * Perform functional verification if test * executed without (-f) option. */ if (STD_FUNCTIONAL_TEST) { /* * Check whether the pending signal SIGUSR1 * in the parent is also pending in the child * process by storing it in a variable. */ if (sigpending(&PendSig) == -1) { tst_resm(TFAIL, "sigpending function " "failed in child"); _exit(1); } /* Check if SIGUSR1 is pending in child */ if (sigismember(&PendSig, SIGUSR1) != 0) { tst_resm(TFAIL, "SIGUSR1 also pending " "in child process"); _exit(1); } /* * Exit with normal exit code if everything * fine */ _exit(0); } } else { /* parent process */ /* * Let the parent process wait till child completes * its execution. */ wait(&exit_status); /* Check for the exit status of child process */ if (WEXITSTATUS(exit_status) == 0) { tst_resm(TPASS, "Call to vfork() " "successful"); } else if (WEXITSTATUS(exit_status) == 1) { tst_resm(TFAIL, \ "Child process exited abnormally"); } } Tst_count++; /* incr. TEST_LOOP counter */ } /* End for TEST_LOOPING */ /* Call cleanup() to undo setup done for the test. */ cleanup(); return 0; } /* End main */
/*--------------------------------------------------------------------*/ int main(int argc, char **argv) { /***** BEGINNING OF MAIN. *****/ int pid, npid; int nsig, exno, nexno, status; int ret_val = 0; int core; void chsig(); #ifdef UCLINUX const char *msg; if ((msg = parse_opts(argc, argv, NULL, NULL)) != NULL) { tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg); } maybe_run_child(&do_child, "dd", &temp, &sig); #endif setup(); //tempdir(); /* move to new directory */ 12/20/2003 blenter(); exno = 1; if (sigset(SIGCLD, chsig) == SIG_ERR) { fprintf(temp, "\tsigset failed, errno = %d\n", errno); fail_exit(); } for (sig = 1; sig < 14; sig++) { fflush(temp); chflag = 0; pid = FORK_OR_VFORK(); if (pid < 0) { forkfail(); } if (pid == 0) { #ifdef UCLINUX if (self_exec(argv[0], "dd", temp, sig) < 0) { tst_resm(TBROK, "self_exec FAILED - " "terminating test."); tst_exit(); } #else do_child(); #endif } else { //fprintf(temp, "Testing signal %d\n", sig); while (!chflag) /* wait for child */ sleep(1); kill(pid, sig); /* child should ignroe this sig */ kill(pid, SIGCLD); /* child should exit */ #ifdef BCS while ((npid = wait(&status)) != pid || (npid == -1 && errno == EINTR)) ; if (npid != pid) { fprintf(temp, "wait error: wait returned wrong pid\n"); ret_val = 1; } #else while ((npid = waitpid(pid, &status, 0)) != -1 || errno == EINTR) ; #endif /* nsig = status & 0177; core = status & 0200; nexno = (status & 0xff00) >> 8; */ /***** LTP Port *****/ nsig = WTERMSIG(status); #ifdef WCOREDUMP core = WCOREDUMP(status); #endif nexno = WIFEXITED(status); /***** ** ** *****/ /* nsig is the signal number returned by wait it should be 0, except when sig = 9 */ if ((sig == 9) && (nsig != sig)) { fprintf(temp, "wait error: unexpected signal" " returned when the signal sent was 9" " The status of the process is %d \n", status); ret_val = 1; } if ((sig != 9) && (nsig != 0)) { fprintf(temp, "wait error: unexpected signal " "returned, the status of the process is " "%d \n", status); ret_val = 1; } /* nexno is the exit number returned by wait it should be 1, except when sig = 9 */ if (sig == 9) if (nexno != 0) { fprintf(temp, "signal error: unexpected" " exit number returned when" " signal sent was 9, the status" " of the process is %d \n", status); ret_val = 1; } else; else if (nexno != 1) { fprintf(temp, "signal error: unexpected exit " "number returned,the status of the" " process is %d\n", status); ret_val = 1; } } } if (ret_val) local_flag = FAILED; /*--------------------------------------------------------------------*/ anyfail(); tst_exit(); } /******** END OF MAIN. ********/
/** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance * @addr: IP address string * @args: Mount options string */ static int rdma_create_trans(struct p9_client *client, const char *addr, char *args) { int err; struct p9_rdma_opts opts; struct p9_trans_rdma *rdma; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; /* Parse the transport specific mount options */ err = parse_opts(args, &opts); if (err < 0) return err; /* Create and initialize the RDMA transport structure */ rdma = alloc_rdma(&opts); if (!rdma) return -ENOMEM; /* Create the RDMA CM ID */ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); if (IS_ERR(rdma->cm_id)) goto error; /* Associate the client with the transport */ client->trans = rdma; /* Resolve the server's address */ rdma->addr.sin_family = AF_INET; rdma->addr.sin_addr.s_addr = in_aton(addr); rdma->addr.sin_port = htons(opts.port); err = rdma_resolve_addr(rdma->cm_id, NULL, (struct sockaddr *)&rdma->addr, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) goto error; /* Resolve the route to the server */ err = rdma_resolve_route(rdma->cm_id, rdma->timeout); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) goto error; /* Query the device attributes */ err = ib_query_device(rdma->cm_id->device, &devattr); if (err) goto error; /* Create the Completion Queue */ rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, cq_event_handler, client, opts.sq_depth + opts.rq_depth + 1, 0); if (IS_ERR(rdma->cq)) goto error; ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); /* Create the Protection Domain */ rdma->pd = ib_alloc_pd(rdma->cm_id->device); if (IS_ERR(rdma->pd)) goto error; /* Cache the DMA lkey in the transport */ rdma->dma_mr = NULL; if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) rdma->lkey = rdma->cm_id->device->local_dma_lkey; else { rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(rdma->dma_mr)) goto error; rdma->lkey = rdma->dma_mr->lkey; } /* Create the Queue Pair */ memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = client; qp_attr.cap.max_send_wr = opts.sq_depth; qp_attr.cap.max_recv_wr = opts.rq_depth; qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; qp_attr.send_cq = rdma->cq; qp_attr.recv_cq = rdma->cq; err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); if (err) goto error; rdma->qp = rdma->cm_id->qp; /* Request a connection */ memset(&conn_param, 0, sizeof(conn_param)); conn_param.private_data = NULL; conn_param.private_data_len = 0; conn_param.responder_resources = P9_RDMA_IRD; conn_param.initiator_depth = P9_RDMA_ORD; err = rdma_connect(rdma->cm_id, &conn_param); if (err) goto error; err = wait_for_completion_interruptible(&rdma->cm_done); if (err || (rdma->state != P9_RDMA_CONNECTED)) goto error; client->status = Connected; return 0; error: rdma_destroy_trans(rdma); return -ENOTCONN; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing zhegvdx */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gpu_time /*cpu_time*/; magmaDoubleComplex *h_A, *h_R, *h_B, *h_S, *h_work; double *w1, *w2, vl=0, vu=0; double result[2] = {0}; magma_int_t *iwork; magma_int_t N, n2, info, il, iu, m1, m2, nb, lwork, liwork; magmaDoubleComplex c_zero = MAGMA_Z_ZERO; magmaDoubleComplex c_one = MAGMA_Z_ONE; magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE; #if defined(PRECISION_z) || defined(PRECISION_c) double *rwork; magma_int_t lrwork; #endif //double d_one = 1.; //double d_ten = 10.; magma_int_t ione = 1; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); double tolulp = opts.tolerance * lapackf77_dlamch("P"); if ( opts.check && opts.jobz == MagmaNoVec ) { fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" ); opts.jobz = MagmaVec; } printf("using: itype = %d, jobz = %s, uplo = %s, check = %d, fraction = %6.4f\n", (int) opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo), (int) opts.check, opts.fraction); printf(" N M GPU Time (sec)\n"); printf("============================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { N = opts.nsize[itest]; n2 = N*N; nb = magma_get_zhetrd_nb(N); #if defined(PRECISION_z) || defined(PRECISION_c) lwork = 2*N*nb + N*N; lrwork = 1 + 5*N +2*N*N; #else lwork = 1 + 6*N*nb + 2* N*N; #endif liwork = 3 + 5*N; if ( opts.fraction == 0 ) { il = N / 10; iu = N / 5+il; } else { il = 1; iu = (int) (opts.fraction*N); if (iu < 1) iu = 1; } TESTING_MALLOC_CPU( h_A, magmaDoubleComplex, n2 ); TESTING_MALLOC_CPU( h_B, magmaDoubleComplex, n2 ); TESTING_MALLOC_CPU( w1, double, N ); TESTING_MALLOC_CPU( w2, double, N ); TESTING_MALLOC_CPU( iwork, magma_int_t, liwork ); TESTING_MALLOC_PIN( h_R, magmaDoubleComplex, n2 ); TESTING_MALLOC_PIN( h_S, magmaDoubleComplex, n2 ); TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_MALLOC_PIN( rwork, double, lrwork); #endif /* Initialize the matrix */ lapackf77_zlarnv( &ione, ISEED, &n2, h_A ); lapackf77_zlarnv( &ione, ISEED, &n2, h_B ); magma_zmake_hpd( N, h_B, N ); magma_zmake_hermitian( N, h_A, N ); // ================================================================== // Warmup using MAGMA // ================================================================== if(opts.warmup){ lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); magma_zhegvdx( opts.itype, opts.jobz, MagmaRangeI, opts.uplo, N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info ); if (info != 0) printf("magma_zhegvdx returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ==================================================================== Performs operation using MAGMA =================================================================== */ lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); gpu_time = magma_wtime(); magma_zhegvdx( opts.itype, opts.jobz, MagmaRangeI, opts.uplo, N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info ); gpu_time = magma_wtime() - gpu_time; if (info != 0) printf("magma_zhegvdx returned error %d: %s.\n", (int) info, magma_strerror( info )); if ( opts.check ) { /* ===================================================================== Check the results following the LAPACK's [zc]hegvdx routine. A x = lambda B x is solved and the following 3 tests computed: (1) | A Z - B Z D | / ( |A||Z| N ) (itype = 1) | A B Z - Z D | / ( |A||Z| N ) (itype = 2) | B A Z - Z D | / ( |A||Z| N ) (itype = 3) (2) | S(with V) - S(w/o V) | / | S | =================================================================== */ #if defined(PRECISION_d) || defined(PRECISION_s) double *rwork = h_work + N*N; #endif double temp1, temp2; result[0] = 1.; result[0] /= lapackf77_zlanhe("1", lapack_uplo_const(opts.uplo), &N, h_A, &N, rwork); result[0] /= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork); if (opts.itype == 1) { blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i < m1; ++i) blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N); result[0] *= lapackf77_zlange("1", &N, &m1, h_work, &N, rwork)/N; } else if (opts.itype == 2) { blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i < m1; ++i) blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N); result[0] *= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork)/N; } else if (opts.itype == 3) { blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N); for(int i=0; i < m1; ++i) blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione); blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N); result[0] *= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork)/N; } lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N ); lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N ); magma_zhegvdx( opts.itype, MagmaNoVec, MagmaRangeI, opts.uplo, N, h_R, N, h_S, N, vl, vu, il, iu, &m2, w2, h_work, lwork, #if defined(PRECISION_z) || defined(PRECISION_c) rwork, lrwork, #endif iwork, liwork, &info ); if (info != 0) printf("magma_zhegvdx returned error %d: %s.\n", (int) info, magma_strerror( info )); temp1 = temp2 = 0; for(int j=0; j < m2; j++) { temp1 = max(temp1, absv(w1[j])); temp1 = max(temp1, absv(w2[j])); temp2 = max(temp2, absv(w1[j]-w2[j])); } result[1] = temp2 / (((double)m2)*temp1); } /* ===================================================================== Print execution time =================================================================== */ printf("%5d %5d %7.2f\n", (int) N, (int) m1, gpu_time); if ( opts.check ) { printf("Testing the eigenvalues and eigenvectors for correctness:\n"); if (opts.itype == 1) { printf("(1) | A Z - B Z D | / (|A| |Z| N) = %8.2e %s\n", result[0], (result[0] < tol ? "ok" : "failed")); } else if (opts.itype == 2) { printf("(1) | A B Z - Z D | / (|A| |Z| N) = %8.2e %s\n", result[0], (result[0] < tol ? "ok" : "failed")); } else if (opts.itype == 3) { printf("(1) | B A Z - Z D | / (|A| |Z| N) = %8.2e %s\n", result[0], (result[0] < tol ? "ok" : "failed")); } printf( "(2) | D(w/ Z) - D(w/o Z) | / |D| = %8.2e %s\n\n", result[1], (result[1] < tolulp ? "ok" : "failed")); status += ! (result[0] < tol && result[1] < tolulp); } TESTING_FREE_CPU( h_A ); TESTING_FREE_CPU( h_B ); TESTING_FREE_CPU( w1 ); TESTING_FREE_CPU( w2 ); TESTING_FREE_CPU( iwork ); TESTING_FREE_PIN( h_R ); TESTING_FREE_PIN( h_S ); TESTING_FREE_PIN( h_work ); #if defined(PRECISION_z) || defined(PRECISION_c) TESTING_FREE_PIN( rwork ); #endif fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }