Ejemplo n.º 1
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing csymmetrize
   Code is very similar to testing_ctranspose.cpp
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t    gbytes, gpu_perf, gpu_time, cpu_perf, cpu_time;
    float           error, work[1];
    magmaFloatComplex  c_neg_one = MAGMA_C_NEG_ONE;
    magmaFloatComplex *h_A, *h_R;
    magmaFloatComplex *d_A;
    magma_int_t N, size, lda, ldda;
    magma_int_t ione     = 1;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    printf("    N   CPU GByte/s (sec)   GPU GByte/s (sec)   check\n");
    printf("=====================================================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            lda    = N;
            ldda   = ((N+31)/32)*32;
            size   = lda*N;
            // load strictly lower triangle, save strictly upper triangle
            gbytes = sizeof(magmaFloatComplex) * 1.*N*(N-1) / 1e9;
    
            TESTING_MALLOC(   h_A, magmaFloatComplex, size   );
            TESTING_MALLOC(   h_R, magmaFloatComplex, size   );
            TESTING_DEVALLOC( d_A, magmaFloatComplex, ldda*N );
            
            /* Initialize the matrix */
            for( int j = 0; j < N; ++j ) {
                for( int i = 0; i < N; ++i ) {
                    h_A[i + j*lda] = MAGMA_C_MAKE( i + j/10000., j );
                }
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            magma_csetmatrix( N, N, h_A, lda, d_A, ldda );
            
            gpu_time = magma_sync_wtime( 0 );
            //magmablas_csymmetrize( opts.uplo, N-2, d_A+1+ldda, ldda );  // inset by 1 row & col
            magmablas_csymmetrize( opts.uplo, N, d_A, ldda );
            gpu_time = magma_sync_wtime( 0 ) - gpu_time;
            gpu_perf = gbytes / gpu_time;
            
            /* =====================================================================
               Performs operation using naive in-place algorithm
               (LAPACK doesn't implement symmetrize)
               =================================================================== */
            cpu_time = magma_wtime();
            //for( int j = 1; j < N-1; ++j ) {    // inset by 1 row & col
            //    for( int i = 1; i < j; ++i ) {
            for( int j = 0; j < N; ++j ) {
                for( int i = 0; i < j; ++i ) {
                    if ( opts.uplo == MagmaLower ) {
                        h_A[i + j*lda] = MAGMA_C_CNJG( h_A[j + i*lda] );
                    }
                    else {
                        h_A[j + i*lda] = MAGMA_C_CNJG( h_A[i + j*lda] );
                    }
                }
            }
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gbytes / cpu_time;
            
            /* =====================================================================
               Check the result
               =================================================================== */
            magma_cgetmatrix( N, N, d_A, ldda, h_R, lda );
            
            blasf77_caxpy(&size, &c_neg_one, h_A, &ione, h_R, &ione);
            error = lapackf77_clange("f", &N, &N, h_R, &lda, work);

            printf("%5d   %7.2f (%7.2f)   %7.2f (%7.2f)   %s\n",
                   (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time,
                   (error == 0. ? "ok" : "failed") );
            
            TESTING_FREE( h_A );
            TESTING_FREE( h_R );
            TESTING_DEVFREE( d_A );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return 0;
}
Ejemplo n.º 2
0
int main(int argc, char *argv[])
{
	size_t memsize = 0;	/* at first in MB, limited to 4Gb on 32 bits */
	int pagesize;

	int i;
	int lc;
	char *msg;
	char *p, *bigmalloc;
	int loop_count;		/* limited to 16Go on 32 bits systems */

	pagesize = sysconf(_SC_PAGESIZE);

	if ((msg = parse_opts(argc, argv, options, help)) != (char *)NULL)
		tst_brkm(TBROK, cleanup, "OPTION PARSING ERROR - %s", msg);

	if (m_opt) {
		memsize = (size_t) atoi(m_copt) * 1024 * 1024;

		if (memsize < 1) {
			tst_brkm(TBROK, cleanup, "Invalid arg for -m: %s",
				 m_copt);
		}
	}

	if (r_opt)
		srand(time(NULL));

	setup();

	for (lc = 0; TEST_LOOPING(lc); lc++) {

		Tst_count = 0;

		if (!m_opt) {
			/* find out by ourselves! */
			memsize = get_memsize();
			if (memsize < 1) {
				tst_brkm(TBROK, cleanup,
					 "Unable to guess maxmemsize from /proc/meminfo");
			}
		}

		/* Allocate (virtual) memory */
		bigmalloc = p = malloc(memsize);

		if (!p) {
			tst_resm(TFAIL, "malloc - alloc of %zuMB failed",
				 memsize / 1024 / 1024);
			cleanup();
		}

		/*
		 * Dirty all the pages, to force physical RAM allocation
		 * and exercise eventually the swapper
		 */
		tst_resm(TINFO, "touching %zuMB of malloc'ed memory (%s)",
			 memsize / 1024 / 1024, r_opt ? "random" : "linear");

		loop_count = memsize / pagesize;

		for (i = 0; i < loop_count; i++) {
			if (v_opt
			    && (i % (PROGRESS_LEAP * 1024 / pagesize) == 0)) {
				printf(".");
				fflush(stdout);
			}
			/*
			 * Make the page dirty,
			 * and make sure compiler won't optimize it away
			 * Touching more than one word per page is useless
			 * because of cache.
			 */
			*(int *)p = 0xdeadbeef ^ i;

			if (r_opt) {
				p = bigmalloc +
				    (size_t) ((double)(memsize - sizeof(int)) *
					      rand() / (RAND_MAX + 1.0));
			} else {
				p += pagesize;
			}
		}

		if (v_opt)
			printf("\n");

		/* This is not mandatory (except in a loop), but it exercise mm again */
		free(bigmalloc);

		/*
		 * seems that if the malloc'ed area was bad, we'd get SEGV (or kicked
		 * somehow by the OOM killer?), hence we can indicate a PASS.
		 */
		tst_resm(TPASS, "malloc - alloc of %zuMB succeeded",
			 memsize / 1024 / 1024);

	}

	cleanup();

	return 0;
}
Ejemplo n.º 3
0
int main(int ac, char **av)
{
	int lc;
	char *msg;
	void check_functionality(void);
	int r_val;

	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	setup();		/* global setup */

	for (lc = 0; TEST_LOOPING(lc); lc++) {
		Tst_count = 0;

		/* get the name of the test dirctory */
		if ((temp_dir = (getcwd(temp_dir, 0))) == NULL)
			tst_brkm(TBROK, cleanup, "getcwd failed");

		/*
		 * create a new directory and open it
		 */

		if ((r_val = mkdir(TEST_DIR, MODES)) == -1)
			tst_brkm(TBROK, cleanup, "mkdir failed");

		if ((fd = open(TEST_DIR, O_RDONLY)) == -1)
			tst_brkm(TBROK, cleanup, "open of directory failed");

		TEST(fchdir(fd));

		if (TEST_RETURN == -1)
			tst_brkm(TFAIL | TTERRNO, cleanup,
				 "fchdir call failed");
		else {
			if (STD_FUNCTIONAL_TEST)
				check_functionality();
			else
				tst_resm(TPASS, "call succeeded");
		}

		/*
		 * clean up things in case we are looping
		 */

		/*
		 * NOTE: in case of failure here, we need to use "tst_resm()"
		 * and not "tst_brkm()".  This is because if we get to this
		 * point, we have already set a PASS or FAIL for the test
		 * and "tst_brkm()" won't report as we might expect.
		 */

		/* chdir back to our temporary work directory */
		if ((r_val = chdir("..")) == -1)
			tst_resm(TBROK | TERRNO, "chdir failed");

		if ((r_val = rmdir(TEST_DIR)) == -1)
			tst_resm(TBROK | TERRNO, "rmdir failed");

		free(temp_dir);
		temp_dir = NULL;
	}

	cleanup();

	tst_exit();
}
Ejemplo n.º 4
0
Archivo: lseek01.c Proyecto: GOEUM/ltp
int main(int ac, char **av)
{
	int lc;
	char *msg;

	int ind;
	int offset;

    /***************************************************************
     * parse standard options
     ***************************************************************/
	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) {
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	}

    /***************************************************************
     * perform global setup for test
     ***************************************************************/
	setup();

	/* set the expected errnos... */
	TEST_EXP_ENOS(exp_enos);

    /***************************************************************
     * check looping state if -c option given
     ***************************************************************/
	for (lc = 0; TEST_LOOPING(lc); lc++) {

		tst_count = 0;

		offset = (lc % 100) * 4096;	/* max size is 100 blocks */

		for (ind = 0; Whence[ind] >= 0; ind++) {

			/*
			 *  Call lseek(2)
			 */
			TEST(lseek(Fd, (long)offset, Whence[ind]));

			/* check return code */
			if (TEST_RETURN == -1) {
				TEST_ERROR_LOG(TEST_ERRNO);
				tst_resm(TFAIL,
					 "lseek(%s, %d, 0) Failed, errno=%d : %s",
					 Fname, offset, TEST_ERRNO,
					 strerror(TEST_ERRNO));
			} else {

		/***************************************************************
	         * only perform functional verification if flag set (-f not given)
	         ***************************************************************/
				if (STD_FUNCTIONAL_TEST) {
					/* No Verification test, yet... */
					tst_resm(TPASS,
						 "lseek(%s, %d, %d) returned %ld",
						 Fname, offset, Whence[ind],
						 TEST_RETURN);
				} else
					tst_count++;
			}
		}

	}

    /***************************************************************
     * cleanup and exit
     ***************************************************************/
	cleanup();
	tst_exit();

}
Ejemplo n.º 5
0
// ----------------------------------------
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    //real_Double_t   t_m, t_c, t_f;
    magma_int_t ione = 1;
    
    magmaDoubleComplex  *A, *B;
    double diff, error;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t m, n, k, size, maxn, ld;
    magmaDoubleComplex x2_m, x2_c;  // complex x for magma, cblas/fortran blas respectively
    double x_m, x_c;  // x for magma, cblas/fortran blas respectively
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    opts.tolerance = max( 100., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    gTol = tol;
    
    printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n"
            "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA.  !!\n"
            "\n"
            "Diff  compares MAGMA wrapper        to CBLAS and BLAS function; should be exactly 0.\n"
            "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n"
            "\n" );
    
    double total_diff  = 0.;
    double total_error = 0.;
    int inc[] = { 1 };  //{ -2, -1, 1, 2 };  //{ 1 };  //{ -1, 1 };
    int ninc = sizeof(inc)/sizeof(*inc);
    
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        m = opts.msize[itest];
        n = opts.nsize[itest];
        k = opts.ksize[itest];
        
    for( int iincx = 0; iincx < ninc; ++iincx ) {
        magma_int_t incx = inc[iincx];
        
    for( int iincy = 0; iincy < ninc; ++iincy ) {
        magma_int_t incy = inc[iincy];
        
        printf("=========================================================================\n");
        printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n",
                (int) m, (int) n, (int) k, (int) incx, (int) incy );
        printf( "Function              MAGMA     CBLAS     BLAS        Diff      Error\n"
                "                      msec      msec      msec\n" );
        
        // allocate matrices
        // over-allocate so they can be any combination of
        // {m,n,k} * {abs(incx), abs(incy)} by
        // {m,n,k} * {abs(incx), abs(incy)}
        maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) );
        ld = max( 1, maxn );
        size = ld*maxn;
        magma_zmalloc_pinned( &A,  size );  assert( A   != NULL );
        magma_zmalloc_pinned( &B,  size );  assert( B   != NULL );
        
        // initialize matrices
        lapackf77_zlarnv( &ione, ISEED, &size, A );
        lapackf77_zlarnv( &ione, ISEED, &size, B );
        
        printf( "Level 1 BLAS ----------------------------------------------------------\n" );
        
        
        // ----- test DZASUM
        // get one-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_dzasum( m, A(0,j), incx );
                
                x_c = cblas_dzasum( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_dzasum( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "dzasum", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test DZNRM2
        // get two-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_dznrm2( m, A(0,j), incx );
                
                x_c = cblas_dznrm2( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_dznrm2( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "dznrm2", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test ZDOTC
        // dot columns, Aj^H Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "zdotc", diff, error );
        total_diff  += diff;
        total_error += error;
        total_error += error;
        
        // ----- test ZDOTU
        // dot columns, Aj^T * Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "zdotu", diff, error );
        total_diff  += diff;
        total_error += error;
        
        // tell user about disabled functions
        #if defined( MAGMA_WITH_MKL )
            printf( "cblas_zdotc and cblas_zdotu disabled with MKL (segfaults)\n" );
        #endif
        
        #if defined( __APPLE__ )
            printf( "blasf77_zdotc and blasf77_zdotu disabled on MacOS (segfaults)\n" );
        #endif
            
        // cleanup
        magma_free_pinned( A );
        magma_free_pinned( B );
        fflush( stdout );
    }}}  // itest, incx, incy
    
    // TODO use average error?
    printf( "sum diffs  = %8.2g, MAGMA wrapper        compared to CBLAS and Fortran BLAS; should be exactly 0.\n"
            "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n",
            total_diff, total_error );
    if ( total_diff != 0. ) {
        printf( "some tests failed diff == 0.; see above.\n" );
    }
    else {
        printf( "all tests passed diff == 0.\n" );
    }
    
    TESTING_FINALIZE();
    
    int status = (total_diff != 0.);
    return status;
}
Ejemplo n.º 6
0
static void report_to_kerneloops(
                const char *dump_dir_name,
                map_string_t *settings)
{
    problem_data_t *problem_data = create_problem_data_for_reporting(dump_dir_name);
    if (!problem_data)
        xfunc_die(); /* create_problem_data_for_reporting already emitted error msg */

    const char *backtrace = problem_data_get_content_or_NULL(problem_data, FILENAME_BACKTRACE);
    if (!backtrace)
        error_msg_and_die("Error sending kernel oops due to missing backtrace");

    const char *env = getenv("KerneloopsReporter_SubmitURL");
    const char *submitURL = (env ? env : get_map_string_item_or_empty(settings, "SubmitURL"));
    if (!submitURL[0])
        submitURL = "http://oops.kernel.org/submitoops.php";

    log(_("Submitting oops report to %s"), submitURL);

    CURLcode ret = http_post_to_kerneloops_site(submitURL, backtrace);
    if (ret != CURLE_OK)
        error_msg_and_die("Kernel oops has not been sent due to %s", curl_easy_strerror(ret));

    problem_data_free(problem_data);

    /* Server replies with:
     * 200 thank you for submitting the kernel oops information
     * RemoteIP: 34192fd15e34bf60fac6a5f01bba04ddbd3f0558
     * - no URL or bug ID apparently...
     */
    struct dump_dir *dd = dd_opendir(dump_dir_name, /*flags:*/ 0);
    if (dd)
    {
        report_result_t rr = { .label = (char *)"kerneloops" };
        rr.url = (char *)submitURL;
        add_reported_to_entry(dd, &rr);
        dd_close(dd);
    }

    log("Kernel oops report was uploaded");
}

int main(int argc, char **argv)
{
    abrt_init(argv);

    /* I18n */
    setlocale(LC_ALL, "");
#if ENABLE_NLS
    bindtextdomain(PACKAGE, LOCALEDIR);
    textdomain(PACKAGE);
#endif

    map_string_t *settings = new_map_string();
    const char *dump_dir_name = ".";
    GList *conf_file = NULL;

    /* Can't keep these strings/structs static: _() doesn't support that */
    const char *program_usage_string = _(
        "& [-v] [-c CONFFILE]... -d DIR\n"
        "\n"
        "Reports kernel oops to kerneloops.org (or similar) site.\n"
        "\n"
        "Files with names listed in $EXCLUDE_FROM_REPORT are not included\n"
        "into the tarball.\n"
        "\n"
        "CONFFILE lines should have 'PARAM = VALUE' format.\n"
        "Recognized string parameter: SubmitURL.\n"
        "Parameter can be overridden via $KerneloopsReporter_SubmitURL."
    );
    enum {
        OPT_v = 1 << 0,
        OPT_d = 1 << 1,
        OPT_c = 1 << 2,
    };
    /* Keep enum above and order of options below in sync! */
    struct options program_options[] = {
        OPT__VERBOSE(&g_verbose),
        OPT_STRING('d', NULL, &dump_dir_name, "DIR" , _("Problem directory")),
        OPT_LIST(  'c', NULL, &conf_file    , "FILE", _("Configuration file")),
        OPT_END()
    };
    /*unsigned opts =*/ parse_opts(argc, argv, program_options, program_usage_string);

    export_abrt_envvars(0);

    while (conf_file)
    {
        char *fn = (char *)conf_file->data;
        log_notice("Loading settings from '%s'", fn);
        load_conf_file(fn, settings, /*skip key w/o values:*/ false);
        log_debug("Loaded '%s'", fn);
        conf_file = g_list_remove(conf_file, fn);
    }

    report_to_kerneloops(dump_dir_name, settings);

    free_map_string(settings);
    return 0;
}
Ejemplo n.º 7
0
/*ARGSUSED*/
int
main(int argc, char **argv)
{
	register int i, j, ok, pid;
	int count, child, status, nwait;

#ifdef UCLINUX
	char *msg;
	if ((msg = parse_opts(argc, argv, (option_t *)NULL, NULL)) != (char *)NULL){
		tst_brkm(TBROK, cleanup, "OPTION PARSING ERROR - %s", msg);
	}

	argv0 = argv[0];
	maybe_run_child(&do_child, "dS", &id_uclinux, &maxsemstring);
#endif

	prog = argv[0];
	nwait = 0;
	setup();		
/*--------------------------------------------------------------*/
	srand(getpid());

	tid = -1;

	for (i = 0; i < NPROCS; i++) {
		do {
			keyarray[i] = (key_t)rand();
			if (keyarray[i] == IPC_PRIVATE) {
				ok = 0;
				continue;
			}
			ok = 1;
			for (j = 0; j < i; j++) {
				if (keyarray[j] == keyarray[i]) {
					ok = 0;
					break;
				}
			}
		} while (ok == 0);
	}

	if ((signal(SIGTERM, term)) == SIG_ERR) {
                tst_resm(TFAIL, "\tsignal failed. errno = %d", errno);
		tst_exit();
	}

	for (i = 0; i <  NPROCS; i++) {
		if ((pid = FORK_OR_VFORK()) < 0) {
                        tst_resm(TFAIL, "\tFork failed (may be OK if under stress)");
                        tst_exit();
		}
		if (pid == 0) {
			procstat = 1;
			dotest(keyarray[i]);
			exit(0);
		}
		pidarray[i] = pid;
		nwait++;
	}

	/*
	 * Wait for children to finish.
	 */

	count = 0;
	while((child = wait(&status)) > 0) {
		if (status) {
	                tst_resm(TFAIL, "%s[%d] Test failed.  exit=0x%x", prog, child, status);
			local_flag = FAILED;
		}
		++count;
	}

	/*
	 * Should have collected all children.
	 */

	if (count != nwait) {
                tst_resm(TFAIL, "\tWrong # children waited on, count = %d", count);
		local_flag = FAILED;
	}

	if (local_flag != FAILED)
		tst_resm(TPASS, "semctl06 ran successfully!");
	else tst_resm(TFAIL, "semctl06 failed");
	
/*--------------------------------------------------------------*/
/* Clean up any files created by test before call to anyfail.	*/

	cleanup ();

	return (0); /* shut lint up */
}
Ejemplo n.º 8
0
int main(int ac, char **av)
{
	int lc;			/* loop counter */
	char *msg;		/* message returned from parse_opts */
	int incr;		/* increment */
	long nbrkpt;		/* new brk point value */
	long cur_brk_val;	/* current size returned by sbrk */
	long aft_brk_val;	/* current size returned by sbrk */

	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	setup();

	/*
	 * Attempt to control how fast we get to test max size.
	 * Every MAX_SIZE_LC'th lc will be fastest test will reach max size.
	 */
	incr = (Max_brk_byte_size - Beg_brk_val) / (MAX_SIZE_LC / 2);

	if ((incr * 2) < 4096)	/* make sure that process will grow */
		incr += 4096 / 2;

	for (lc = 0; TEST_LOOPING(lc); lc++) {

		Tst_count = 0;

		/*
		 * Determine new value to give brk
		 * Every even lc value, grow by 2 incr and
		 * every odd lc value, strink by one incr.
		 * If lc is equal to 3, no change, special case.
		 */
		cur_brk_val = (long)sbrk(0);
		if (lc == 3) {
			nbrkpt = cur_brk_val;	/* no change, special one time case */
		} else if ((lc % 2) == 0) {
			/*
			 * grow
			 */
			nbrkpt = cur_brk_val + (2 * incr);

			if (nbrkpt > Max_brk_byte_size)
				nbrkpt = Beg_brk_val;	/* start over */

		} else {
			/*
			 * shrink
			 */
			nbrkpt = cur_brk_val - incr;
		}

/****
    printf("cur_brk_val = %d, nbrkpt = %d, incr = %d, lc = %d\n",
	cur_brk_val, nbrkpt, incr, lc);
****/

		/*
		 * Call brk(2)
		 */
		TEST(brk((char *)nbrkpt));

		/* check return code */
		if (TEST_RETURN == -1) {

			aft_brk_val = (long)sbrk(0);
			tst_resm(TFAIL|TTERRNO,
				 "brk(%ld) failed (size before %ld, after %ld)",
				 nbrkpt, cur_brk_val, aft_brk_val);

		} else {

			if (STD_FUNCTIONAL_TEST) {

				aft_brk_val = (long)sbrk(0);
				if (aft_brk_val == nbrkpt) {

					tst_resm(TPASS,
						 "brk(%ld) returned %ld, new size verified by sbrk",
						 nbrkpt, TEST_RETURN);
				} else {
					tst_resm(TFAIL,
						 "brk(%ld) returned %ld, sbrk before %ld, after %ld",
						 nbrkpt, TEST_RETURN,
						 cur_brk_val, aft_brk_val);
				}
			}
		}

	}

	cleanup();

	tst_exit();
}
Ejemplo n.º 9
0
int main(int ac, char **av)
{
	int lc;
	char *msg;
	int results;

	/* Disable test if the version of the kernel is less than 2.6.17 */
	if (((results = tst_kvercmp(2, 6, 17)) < 0)) {
		tst_resm(TWARN, "This test can only run on kernels that are ");
		tst_resm(TWARN, "2.6.17 and higher");
		exit(0);
	}

	/*
	 * parse standard options
	 */
	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	/*
	 * perform global setup for test
	 */
	setup();

	/*
	 * check if the current filesystem is nfs
	 */
	if (tst_is_cwd_nfs()) {
		tst_brkm(TCONF, cleanup,
			 "Cannot do tee on a file located on an NFS filesystem");
	}

	/*
	 * check looping state if -c option given
	 */
	for (lc = 0; TEST_LOOPING(lc); lc++) {

		Tst_count = 0;

		/*
		 * Call tee_test
		 */
		TEST(tee_test());

		/* check return code */
		if (TEST_RETURN < 0) {
			if (TEST_RETURN != -1) {
				TEST_ERRNO = -TEST_RETURN;
			}
			TEST_ERROR_LOG(TEST_ERRNO);
			tst_resm(TFAIL, "tee() Failed, errno=%d : %s",
				 TEST_ERRNO, strerror(TEST_ERRNO));
		} else {

			/*
			 * only perform functional verification if flag set (-f not given)
			 */
			if (STD_FUNCTIONAL_TEST) {
				/* No Verification test, yet... */
				tst_resm(TPASS, "tee() returned %ld",
					 TEST_RETURN);
			}
		}

	}

	/*
	 * cleanup and exit
	 */
	cleanup();

	return (0);
}
Ejemplo n.º 10
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zgesv
*/
int main(int argc, char **argv)
{
    TESTING_INIT();

    real_Double_t   gflops, cpu_perf, cpu_time, gpu_perf, gpu_time;
    double          error, Rnorm, Anorm, Xnorm, *work;
    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex *h_A, *h_LU, *h_B, *h_X;
    magma_int_t *ipiv;
    magma_int_t N, nrhs, lda, ldb, info, sizeA, sizeB;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    nrhs = opts.nrhs;
    
    printf("ngpu %d\n", (int) opts.ngpu );
    printf("    N  NRHS   CPU Gflop/s (sec)   GPU GFlop/s (sec)   ||B - AX|| / N*||A||*||X||\n");
    printf("================================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda    = N;
            ldb    = lda;
            gflops = ( FLOPS_ZGETRF( N, N ) + FLOPS_ZGETRS( N, nrhs ) ) / 1e9;
            
            TESTING_MALLOC_CPU( h_A,  magmaDoubleComplex, lda*N    );
            TESTING_MALLOC_CPU( h_LU, magmaDoubleComplex, lda*N    );
            TESTING_MALLOC_CPU( h_B,  magmaDoubleComplex, ldb*nrhs );
            TESTING_MALLOC_CPU( h_X,  magmaDoubleComplex, ldb*nrhs );
            TESTING_MALLOC_CPU( work, double,             N        );
            TESTING_MALLOC_CPU( ipiv, magma_int_t,        N        );
            
            /* Initialize the matrices */
            sizeA = lda*N;
            sizeB = ldb*nrhs;
            lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
            lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
            
            // copy A to LU and B to X; save A and B for residual
            lapackf77_zlacpy( "F", &N, &N,    h_A, &lda, h_LU, &lda );
            lapackf77_zlacpy( "F", &N, &nrhs, h_B, &ldb, h_X,  &ldb );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_zgesv( N, nrhs, h_LU, lda, ipiv, h_X, ldb, opts.queues2, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_zgesv returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            //=====================================================================
            // Residual
            //=====================================================================
            Anorm = lapackf77_zlange("I", &N, &N,    h_A, &lda, work);
            Xnorm = lapackf77_zlange("I", &N, &nrhs, h_X, &ldb, work);
            
            blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N,
                           &c_one,     h_A, &lda,
                                       h_X, &ldb,
                           &c_neg_one, h_B, &ldb);
            
            Rnorm = lapackf77_zlange("I", &N, &nrhs, h_B, &ldb, work);
            error = Rnorm/(N*Anorm*Xnorm);
            status += ! (error < tol);
            
            /* ====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_zgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0)
                    printf("lapackf77_zgesv returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                printf( "%5d %5d   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",
                        (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time,
                        error, (error < tol ? "ok" : "failed"));
            }
            else {
                printf( "%5d %5d     ---   (  ---  )   %7.2f (%7.2f)   %8.2e   %s\n",
                        (int) N, (int) nrhs, gpu_perf, gpu_time,
                        error, (error < tol ? "ok" : "failed"));
            }
            
            TESTING_FREE_CPU( h_A  );
            TESTING_FREE_CPU( h_LU );
            TESTING_FREE_CPU( h_B  );
            TESTING_FREE_CPU( h_X  );
            TESTING_FREE_CPU( work );
            TESTING_FREE_CPU( ipiv );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}
Ejemplo n.º 11
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing cunmqr
*/
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    float error, work[1];
    magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
    magma_int_t ione = 1;
    magma_int_t mm, m, n, k, size, info;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t nb, ldc, lda, lwork, lwork_max;
    magmaFloatComplex *C, *R, *A, *W, *tau;
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    // need slightly looser bound (60*eps instead of 30*eps) for some tests
    opts.tolerance = max( 60., opts.tolerance );
    float tol = opts.tolerance * lapackf77_slamch("E");
    
    // test all combinations of input parameters
    magma_side_t  side [] = { MagmaLeft,      MagmaRight   };
    magma_trans_t trans[] = { MagmaConjTrans, MagmaNoTrans };

    printf("    M     N     K   side   trans   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R||_F / ||QC||_F\n");
    printf("===============================================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
      for( int iside = 0; iside < 2; ++iside ) {
      for( int itran = 0; itran < 2; ++itran ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            m = opts.msize[itest];
            n = opts.nsize[itest];
            k = opts.ksize[itest];
            nb  = magma_get_cgeqrf_nb( m );
            ldc = m;
            // A is m x k (left) or n x k (right)
            mm = (side[iside] == MagmaLeft ? m : n);
            lda = mm;
            gflops = FLOPS_CUNMQR( m, n, k, side[iside] ) / 1e9;
            
            if ( side[iside] == MagmaLeft && m < k ) {
                printf( "%5d %5d %5d   %4c   %5c   skipping because side=left  and m < k\n",
                        (int) m, (int) n, (int) k,
                        lapacke_side_const( side[iside] ),
                        lapacke_trans_const( trans[itran] ) );
                continue;
            }
            if ( side[iside] == MagmaRight && n < k ) {
                printf( "%5d %5d %5d   %4c   %5c   skipping because side=right and n < k\n",
                        (int) m, (int) n, (int) k,
                        lapacke_side_const( side[iside] ),
                        lapacke_trans_const( trans[itran] ) );
                continue;
            }
            
            // need at least 2*nb*nb for geqrf
            lwork_max = max( max( m*nb, n*nb ), 2*nb*nb );
            
            TESTING_MALLOC_CPU( C,   magmaFloatComplex, ldc*n );
            TESTING_MALLOC_CPU( R,   magmaFloatComplex, ldc*n );
            TESTING_MALLOC_CPU( A,   magmaFloatComplex, lda*k );
            TESTING_MALLOC_CPU( W,   magmaFloatComplex, lwork_max );
            TESTING_MALLOC_CPU( tau, magmaFloatComplex, k );
            
            // C is full, m x n
            size = ldc*n;
            lapackf77_clarnv( &ione, ISEED, &size, C );
            lapackf77_clacpy( "Full", &m, &n, C, &ldc, R, &ldc );
            
            size = lda*k;
            lapackf77_clarnv( &ione, ISEED, &size, A );
            
            // compute QR factorization to get Householder vectors in A, tau
            magma_cgeqrf( mm, k, A, lda, tau, W, lwork_max, &info );
            if (info != 0)
                printf("magma_cgeqrf returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            cpu_time = magma_wtime();
            lapackf77_cunmqr( lapack_side_const( side[iside] ), lapack_trans_const( trans[itran] ),
                              &m, &n, &k,
                              A, &lda, tau, C, &ldc, W, &lwork_max, &info );
            cpu_time = magma_wtime() - cpu_time;
            cpu_perf = gflops / cpu_time;
            if (info != 0)
                printf("lapackf77_cunmqr returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            // query for workspace size
            lwork = -1;
            magma_cunmqr( side[iside], trans[itran],
                          m, n, k,
                          A, lda, tau, R, ldc, W, lwork, &info );
            if (info != 0)
                printf("magma_cunmqr (lwork query) returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            lwork = (magma_int_t) MAGMA_C_REAL( W[0] );
            if ( lwork < 0 || lwork > lwork_max ) {
                printf("optimal lwork %d > lwork_max %d\n", (int) lwork, (int) lwork_max );
                lwork = lwork_max;
            }
            
            gpu_time = magma_wtime();
            magma_cunmqr( side[iside], trans[itran],
                          m, n, k,
                          A, lda, tau, R, ldc, W, lwork, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_cunmqr returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
                        
            /* =====================================================================
               compute relative error |QC_magma - QC_lapack| / |QC_lapack|
               =================================================================== */
            error = lapackf77_clange( "Fro", &m, &n, C, &ldc, work );
            size = ldc*n;
            blasf77_caxpy( &size, &c_neg_one, C, &ione, R, &ione );
            error = lapackf77_clange( "Fro", &m, &n, R, &ldc, work ) / error;
            
            printf( "%5d %5d %5d   %4c   %5c   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e   %s\n",
                    (int) m, (int) n, (int) k,
                    lapacke_side_const( side[iside] ),
                    lapacke_trans_const( trans[itran] ),
                    cpu_perf, cpu_time, gpu_perf, gpu_time,
                    error, (error < tol ? "ok" : "failed") );
            status += ! (error < tol);
            
            TESTING_FREE_CPU( C );
            TESTING_FREE_CPU( R );
            TESTING_FREE_CPU( A );
            TESTING_FREE_CPU( W );
            TESTING_FREE_CPU( tau );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
      }}  // end iside, itran
      printf( "\n" );
    }
    
    TESTING_FINALIZE();
    return status;
}
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zhetrd_he2hb
*/
int main( int argc, char** argv)
{
    TESTING_INIT_MGPU();

    real_Double_t gflops, gpu_time, gpu_perf;
    magmaDoubleComplex *h_A, *h_R, *h_work;
    magmaDoubleComplex *tau;
    double *D, *E;
    magma_int_t N, n2, lda, ldda, lwork, ldt, info, nstream;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;

    // TODO add these options to parse_opts
    magma_int_t NE      = 0;
    magma_int_t distblk = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    magma_int_t WANTZ = (opts.jobz == MagmaVec);
    double tol = opts.tolerance * lapackf77_dlamch("E");
    if (opts.nb == 0)
        opts.nb = 64; //magma_get_zhetrd_he2hb_nb(N);

    if (NE < 1)
        NE = N; //64; //magma_get_zhetrd_he2hb_nb(N);

    nstream = max(3, opts.ngpu+2);
    magma_queue_t streams[MagmaMaxGPUs][20];
    magmaDoubleComplex *da[MagmaMaxGPUs], *dT1[MagmaMaxGPUs];
    if ((distblk == 0) || (distblk < opts.nb))
        distblk = max(256, opts.nb);
    printf("voici ngpu %d distblk %d NB %d nstream %d\n ",
           (int) opts.ngpu, (int) distblk, (int) opts.nb, (int) nstream);

    for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) {
        magma_setdevice( dev );
        for( int i = 0; i < nstream; ++i ) {
            magma_queue_create( &streams[dev][i] );
        }
    }
    magma_setdevice( 0 );

    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N     = opts.nsize[itest];
            lda   = N;
            ldt   = N;
            ldda  = ((N+31)/32)*32;
            n2    = N*lda;
            /* We suppose the magma NB is bigger than lapack NB */
            lwork = N*opts.nb;
            //gflops = ....?

            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( tau,    magmaDoubleComplex, N-1   );

            TESTING_MALLOC_PIN( h_A,    magmaDoubleComplex, lda*N );
            TESTING_MALLOC_PIN( h_R,    magmaDoubleComplex, lda*N );
            TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork );
            TESTING_MALLOC_PIN( D, double, N );
            TESTING_MALLOC_PIN( E, double, N );

            for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) {
                magma_int_t mlocal = ((N / distblk) / opts.ngpu + 1) * distblk;
                magma_setdevice( dev );
                TESTING_MALLOC_DEV( da[dev],  magmaDoubleComplex, ldda*mlocal );
                TESTING_MALLOC_DEV( dT1[dev], magmaDoubleComplex, N*opts.nb        );
            }
            
            /* ====================================================================
               Initialize the matrix
               =================================================================== */
            lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
            magma_zmake_hermitian( N, h_A, lda );

            lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );

            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            /* Copy the matrix to the GPU */
            magma_zsetmatrix_1D_col_bcyclic( N, N, h_R, lda, da, ldda, opts.ngpu, distblk);
            //magmaDoubleComplex *dabis;
            //TESTING_MALLOC_DEV( dabis,  magmaDoubleComplex, ldda*N );
            //magma_zsetmatrix(N, N, h_R, lda, dabis, ldda);

            for (int count=0; count < 1; ++count) {
                magma_setdevice(0);
                gpu_time = magma_wtime();
                if (opts.version == 30) {
                    magma_zhetrd_he2hb_mgpu_spec(
                        opts.uplo, N, opts.nb, h_R, lda, tau, h_work, lwork,
                        da, ldda, dT1, opts.nb, opts.ngpu, distblk,
                        streams, nstream, opts.nthread, &info);
                } else {
                    nstream = 3;
                    magma_zhetrd_he2hb_mgpu(
                        opts.uplo, N, opts.nb, h_R, lda, tau, h_work, lwork,
                        da, ldda, dT1, opts.nb, opts.ngpu, distblk,
                        streams, nstream, opts.nthread, &info);
                }
                // magma_zhetrd_he2hb(opts.uplo, N, opts.nb, h_R, lda, tau, h_work, lwork, dT1[0], &info);
                gpu_time = magma_wtime() - gpu_time;
                printf("  Finish BAND  N %d  NB %d  dist %d  ngpu %d version %d timing= %f\n",
                       N, opts.nb, distblk, opts.ngpu, opts.version, gpu_time);
            }
            magma_setdevice(0);

            for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) {
                magma_setdevice(dev);
                cudaDeviceSynchronize();
            }
            magma_setdevice(0);
            magmablasSetKernelStream( NULL );

            // todo neither of these is declared in headers
            // magma_zhetrd_bhe2trc_v5(opts.nthread, WANTZ, opts.uplo, NE, N, opts.nb, h_R, lda, D, E, dT1[0], ldt);
            // magma_zhetrd_bhe2trc(opts.nthread, WANTZ, opts.uplo, NE, N, opts.nb, h_R, lda, D, E, dT1[0], ldt);
            
            // todo where is this timer started?
            // gpu_time = magma_wtime() - gpu_time;
            
            // todo what are the gflops?
            gpu_perf = gflops / gpu_time;
            
            if (info != 0)
                printf("magma_zhetrd_he2hb returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            /* =====================================================================
               Print performance and error.
               =================================================================== */
#if defined(CHECKEIG)
#if defined(PRECISION_z) || defined(PRECISION_d)
            if ( opts.check ) {
                printf("  Total N %5d  flops %6.2f  timing %6.2f seconds\n", (int) N, gpu_perf, gpu_time );
                char JOBZ;
                if (WANTZ == 0)
                    JOBZ = 'N';
                else
                    JOBZ = 'V';
                double nrmI=0.0, nrm1=0.0, nrm2=0.0;
                int    lwork2 = 256*N;
                magmaDoubleComplex *work2, *AINIT;
                double *rwork2, *D2;
                // TODO free this memory !
                magma_zmalloc_cpu( &work2, lwork2 );
                magma_dmalloc_cpu( &rwork2, N );
                magma_dmalloc_cpu( &D2, N );
                magma_zmalloc_cpu( &AINIT, N*lda );
                memcpy(AINIT, h_A, N*lda*sizeof(magmaDoubleComplex));
                
                /* =====================================================================
                   Performs operation using LAPACK
                   =================================================================== */
                cpu_time = magma_wtime();
                int nt = min(12, opts.nthread);

                #if defined(USEMKL)
                mkl_set_num_threads(nt);
                #endif
                #if defined(USEACML)
                omp_set_num_threads(nt);
                #endif

                #if defined(PRECISION_z) || defined (PRECISION_c)
                lapackf77_zheev( "N", "L", &N, h_A, &lda, D2, work2, &lwork2, rwork2, &info );
                #else
                lapackf77_dsyev( "N", "L", &N, h_A, &lda, D2, work2, &lwork2, &info );
                #endif
                ///* call eigensolver for our resulting tridiag [D E] and for Q */
                //dstedc_withZ('V', N, D, E, h_R, lda);
                ////dsterf_( &N, D, E, &info);
                
                cpu_time = magma_wtime() - cpu_time;
                printf("  Finish CHECK - EIGEN   timing= %f  threads %d\n", cpu_time, nt);

                /* compare result */
                cmp_vals(N, D2, D, &nrmI, &nrm1, &nrm2);

                magmaDoubleComplex *WORKAJETER;
                double *RWORKAJETER, *RESU;
                // TODO free this memory !
                magma_zmalloc_cpu( &WORKAJETER, (2* N * N + N)  );
                magma_dmalloc_cpu( &RWORKAJETER, N  );
                magma_dmalloc_cpu( &RESU, 10 );
                int MATYPE;
                memset(RESU, 0, 10*sizeof(double));

                MATYPE=3;
                double NOTHING=0.0;
                cpu_time = magma_wtime();
                // check results
                zcheck_eig_( lapack_vec_const(opts.jobz), &MATYPE, &N, &opts.nb,
                             AINIT, &lda, &NOTHING, &NOTHING, D2, D,
                             h_R, &lda, WORKAJETER, RWORKAJETER, RESU );
                cpu_time = magma_wtime() - cpu_time;
                printf("  Finish CHECK - results timing= %f\n", cpu_time);
                #if defined(USEMKL)
                mkl_set_num_threads(1);
                #endif
                #if defined(USEACML)
                omp_set_num_threads(1);
                #endif

                printf("\n");
                printf(" ================================================================================================================\n");
                printf("   ==> INFO voici  threads=%d    N=%d    NB=%d   WANTZ=%d\n", (int) opts.nthread, (int) N, (int) opts.nb, (int) WANTZ);
                printf(" ================================================================================================================\n");
                printf("            DSBTRD                : %15s \n", "STATblgv9withQ    ");
                printf(" ================================================================================================================\n");
                if (WANTZ > 0)
                    printf(" | A - U S U' | / ( |A| n ulp )   : %15.3E   \n", RESU[0]);
                if (WANTZ > 0)
                    printf(" | I - U U' | / ( n ulp )         : %15.3E   \n", RESU[1]);
                printf(" | D1 - EVEIGS | / (|D| ulp)      : %15.3E   \n",  RESU[2]);
                printf(" max | D1 - EVEIGS |              : %15.3E   \n",  RESU[6]);
                printf(" ================================================================================================================\n\n\n");

                printf(" ****************************************************************************************************************\n");
                printf(" * Hello here are the norm  Infinite (max)=%8.2e  norm one (sum)=%8.2e   norm2(sqrt)=%8.2e *\n", nrmI, nrm1, nrm2);
                printf(" ****************************************************************************************************************\n\n");
            }
#endif  // PRECISION_z || PRECISION_d
#endif  // CHECKEIG

            printf("  Total N %5d  flops %6.2f        timing %6.2f seconds\n", (int) N, 0.0, gpu_time );
            printf("============================================================================\n\n\n");

            TESTING_FREE_CPU( tau    );

            TESTING_FREE_PIN( h_A    );
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_work );
            TESTING_FREE_PIN( D      );
            TESTING_FREE_PIN( E      );

            for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) {
                magma_setdevice( dev );
                TESTING_FREE_DEV( da[dev]  );
                TESTING_FREE_DEV( dT1[dev] );
            }
            magma_setdevice( 0 );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    for( magma_int_t dev = 0; dev < opts.ngpu; ++dev ) {
        for( int i = 0; i < nstream; ++i ) {
            magma_queue_destroy( streams[dev][i] );
        }
    }

    TESTING_FINALIZE_MGPU();
    return status;
}
Ejemplo n.º 13
0
int main(int ac, char **av)
{
	int lc;			/* loop counter */
	char *msg;		/* message returned from parse_opts */
	int status;

    /***************************************************************
     * parse standard options
     ***************************************************************/
	if ((msg = parse_opts(ac, av, (option_t *) NULL, NULL)) != (char *)NULL) {
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
		tst_exit();
	}
#ifdef UCLINUX
	maybe_run_child(&do_child, "");
#endif

    /***************************************************************
     * perform global setup for test
     ***************************************************************/
	setup();

    /***************************************************************
     * check looping state if -c option given
     ***************************************************************/
	for (lc = 0; TEST_LOOPING(lc); lc++) {

		/* reset Tst_count in case we are looping. */
		Tst_count = 0;

		/* make a child process so we can kill it */
		/* If we cannot fork => we cannot test kill, so break and exit */
		if ((fork_pid = FORK_OR_VFORK()) == -1) {
			tst_brkm(TBROK, cleanup,
				 "fork() Failure. errno=%d : %s",
				 errno, strerror(errno));
		}

		if (fork_pid == 0) {
			/* CHILD */
#ifdef UCLINUX
			if (self_exec(av[0], "") < 0) {
				tst_brkm(TBROK, cleanup,
					 "self_exec of child failed");
			}
#else
			do_child();
#endif
		}

		/* PARENT */
		/*
		 * Call kill(2)
		 */
		TEST(kill(fork_pid, SIGKILL));
		/* check return code */
		if (TEST_RETURN == -1) {
			TEST_ERROR_LOG(TEST_ERRNO);
			tst_resm(TFAIL,
				 "kill(%d, SIGKILL) Failed, errno=%d : %s",
				 fork_pid, TEST_ERRNO, strerror(TEST_ERRNO));
		} else {

	    /***************************************************************
	     * only perform functional verification if flag set (-f not given)
	     ***************************************************************/
			if (STD_FUNCTIONAL_TEST) {
				/* No Verification test, yet... */
				tst_resm(TPASS, "kill(%d, SIGKILL) returned %ld",
					 fork_pid, TEST_RETURN);
			}
		}

		/*
		 * wait for process to cleanup zombies.
		 *
		 */
		waitpid(0, &status, WNOHANG);

	}			/* End for TEST_LOOPING */

    /***************************************************************
     * cleanup and exit
     ***************************************************************/
	cleanup();

	return 0;
}				/* End main */
Ejemplo n.º 14
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing cungqr
*/
int main( int argc, char** argv )
{
    TESTING_INIT();

    real_Double_t    gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    float           Anorm, error, work[1];
    magmaFloatComplex  c_neg_one = MAGMA_C_NEG_ONE;
    magmaFloatComplex *hA, *hR, *tau, *h_work;
    magmaFloatComplex_ptr dA, dT;
    magma_int_t m, n, k;
    magma_int_t n2, lda, ldda, lwork, min_mn, nb, info;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );

    float tol = opts.tolerance * lapackf77_slamch("E");
    opts.lapack |= opts.check;  // check (-c) implies lapack (-l)
    
    printf("Running version %d; available are (specified through --version num):\n",
           (int) opts.version);
    printf("1 - uses precomputed clarft matrices (default)\n");
    printf("2 - recomputes the clarft matrices on the fly\n\n");

    printf("    m     n     k   CPU GFlop/s (sec)   GPU GFlop/s (sec)   ||R|| / ||A||\n");
    printf("=========================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            m = opts.msize[itest];
            n = opts.nsize[itest];
            k = opts.ksize[itest];
            if ( m < n || n < k ) {
                printf( "%5d %5d %5d   skipping because m < n or n < k\n", (int) m, (int) n, (int) k );
                continue;
            }
            
            lda  = m;
            ldda = ((m + 31)/32)*32;
            n2 = lda*n;
            min_mn = min(m, n);
            nb = magma_get_cgeqrf_nb( m );
            lwork  = (m + 2*n+nb)*nb;
            gflops = FLOPS_CUNGQR( m, n, k ) / 1e9;
            
            TESTING_MALLOC_PIN( h_work, magmaFloatComplex, lwork  );
            TESTING_MALLOC_PIN( hR,     magmaFloatComplex, lda*n  );
            
            TESTING_MALLOC_CPU( hA,     magmaFloatComplex, lda*n  );
            TESTING_MALLOC_CPU( tau,    magmaFloatComplex, min_mn );
            
            TESTING_MALLOC_DEV( dA,     magmaFloatComplex, ldda*n );
            TESTING_MALLOC_DEV( dT,     magmaFloatComplex, ( 2*min_mn + ((n + 31)/32)*32 )*nb );
            
            lapackf77_clarnv( &ione, ISEED, &n2, hA );
            lapackf77_clacpy( MagmaFullStr, &m, &n, hA, &lda, hR, &lda );
            
            Anorm = lapackf77_clange("f", &m, &n, hA, &lda, work );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            // first, get QR factors in both hA and hR
            // okay that magma_cgeqrf_gpu has special structure for R; R isn't used here.
            magma_csetmatrix( m, n, hA, lda, dA, ldda );
            magma_cgeqrf_gpu( m, n, dA, ldda, tau, dT, &info );
            if (info != 0)
                printf("magma_cgeqrf_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            magma_cgetmatrix( m, n, dA, ldda, hA, lda );
            lapackf77_clacpy( MagmaFullStr, &m, &n, hA, &lda, hR, &lda );
            
            gpu_time = magma_wtime();
            if (opts.version == 1)
                magma_cungqr( m, n, k, hR, lda, tau, dT, nb, &info );
            else
                magma_cungqr2(m, n, k, hR, lda, tau, &info );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_cungqr_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_cungqr( &m, &n, &k, hA, &lda, tau, h_work, &lwork, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0)
                    printf("lapackf77_cungqr returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                // compute relative error |R|/|A| := |Q_magma - Q_lapack|/|A|
                blasf77_caxpy( &n2, &c_neg_one, hA, &ione, hR, &ione );
                error = lapackf77_clange("f", &m, &n, hR, &lda, work) / Anorm;
                
                bool okay = (error < tol);
                status += ! okay;
                printf("%5d %5d %5d   %7.1f (%7.2f)   %7.1f (%7.2f)   %8.2e   %s\n",
                       (int) m, (int) n, (int) k,
                       cpu_perf, cpu_time, gpu_perf, gpu_time,
                       error, (okay ? "ok" : "failed"));
            }
            else {
                printf("%5d %5d %5d     ---   (  ---  )   %7.1f (%7.2f)     ---  \n",
                       (int) m, (int) n, (int) k,
                       gpu_perf, gpu_time );
            }
            
            TESTING_FREE_PIN( h_work );
            TESTING_FREE_PIN( hR     );
            
            TESTING_FREE_CPU( hA  );
            TESTING_FREE_CPU( tau );
            
            TESTING_FREE_DEV( dA );
            TESTING_FREE_DEV( dT );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }
    
    TESTING_FINALIZE();
    return status;
}
Ejemplo n.º 15
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing magma_ssymm_mgpu
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    float c_neg_one = MAGMA_S_NEG_ONE;
    float calpha    = MAGMA_S_MAKE( 3.456, 5.678 );
    float cbeta     = MAGMA_S_MAKE( 1.234, 2.456 );
    
    real_Double_t    gflops, gpu_perf=0., cpu_perf=0., gpu_time=0., cpu_time=0.;
    real_Double_t    gpu_perf2=0., gpu_time2=0.;
    float           error=0., errorbis=0., work[1];
    float *hA, *hX, *hB, *hR;
    float *dA[MagmaMaxGPUs], *dX[MagmaMaxGPUs], *dB[MagmaMaxGPUs], *dwork[MagmaMaxGPUs], *hwork[MagmaMaxGPUs+1];
    float *dA2;
    magma_int_t M, N, size, lda, ldda, msize, nb, nstream;
    magma_int_t ione     = 1;
    magma_int_t iseed[4] = {0,0,0,1};
    magma_int_t status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    float tol = opts.tolerance * lapackf77_slamch("E");
    
    // default values
    nb      = (opts.nb      > 0 ? opts.nb      : 64);
    nstream = (opts.nstream > 0 ? opts.nstream :  2);
    
    magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2];
    magma_int_t nbcmplx = 0;
    magma_buildconnection_mgpu(gnode, &nbcmplx, opts.ngpu);
    printf("Initializing communication pattern... GPU-ncmplx %d\n\n", (int) nbcmplx);

    for (int i=0; i < nbcmplx; ++i) {
        int myngpu = gnode[i][MagmaMaxGPUs];
        printf("cmplx %d has %d gpu ", i, myngpu);
        for(int j=0; j < myngpu; ++j)
            printf("  %d", (int) gnode[i][j]);
        printf("\n");
    }

    magma_int_t nbevents = 2;
    magma_queue_t streams[MagmaMaxGPUs][20];
    magma_event_t redevents[MagmaMaxGPUs][20];
    magma_event_t redevents2[MagmaMaxGPUs][MagmaMaxGPUs*MagmaMaxGPUs+10];
    for( int d = 0; d < opts.ngpu; ++d ) {
        for( magma_int_t i = 0; i < nstream; ++i ) {
            magma_queue_create( &streams[d][i] );
        }
        for( magma_int_t i = 0; i < nbevents; ++i ) {
            cudaEventCreateWithFlags(&redevents[d][i],  cudaEventDisableTiming);
            cudaEventCreateWithFlags(&redevents2[d][i], cudaEventDisableTiming);
        }
    }

    printf( "nb %d, ngpu %d, nstream %d version %d\n", (int) nb, (int) opts.ngpu, (int) nstream, (int) opts.version );
    printf("    M     N    nb offset  CPU GFlop/s (sec)   GPU GFlop/s (sec)   CUBLAS hemm (sec)   ||R|| / ||A||*||X||\n");
    printf("=========================================================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
      M = opts.msize[itest];
      N = opts.nsize[itest];
      for( int offset = 0; offset < N; offset += min(N,nb) ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            msize = M - offset;
            lda   = M;
            ldda  = ((M + 31)/32)*32;
            size  = lda*M;
            gflops = FLOPS_SSYMM( MagmaLeft, (float)msize, (float)N ) / 1e9;
            
            magma_int_t dworksiz = ldda*N*3;
            magma_int_t hworksiz = lda*N;
            
            TESTING_MALLOC_CPU( hA, float, lda*M );
            TESTING_MALLOC_CPU( hX, float, lda*N );
            TESTING_MALLOC_CPU( hB, float, lda*N );
            
            TESTING_MALLOC_PIN( hR, float, lda*N );

            for( int d = 0; d < opts.ngpu; ++d ) {
                magma_int_t mlocal = ((M / nb) / opts.ngpu + 1) * nb;
                magma_setdevice( d );
                TESTING_MALLOC_DEV( dA[d],    float, ldda*mlocal );
                TESTING_MALLOC_DEV( dX[d],    float, ldda*N      );
                TESTING_MALLOC_DEV( dB[d],    float, ldda*N      );
                TESTING_MALLOC_DEV( dwork[d], float, dworksiz    );
                
                TESTING_MALLOC_PIN( hwork[d], float, hworksiz    );
            }
            TESTING_MALLOC_PIN( hwork[opts.ngpu], float, lda*N );
        
            if ( opts.check ) {
                magma_setdevice( 0 );
                TESTING_MALLOC_DEV( dA2, float, ldda*M );
            }

            lapackf77_slarnv( &ione, iseed, &size, hA );
            magma_smake_symmetric( M, hA, lda );
            
            size = lda*N;
            lapackf77_slarnv( &ione, iseed, &size, hX );
            lapackf77_slarnv( &ione, iseed, &size, hB );
            lapackf77_slacpy( "Full", &M, &N, hB, &lda, hR, &lda );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            magma_ssetmatrix_1D_col_bcyclic( M, M, hA, lda, dA, ldda, opts.ngpu, nb );
            for( int d = 0; d < opts.ngpu; ++d ) {
                magma_setdevice( d );
                //magmablasSetKernelStream( streams[ d ][  0 ] );
                magma_ssetmatrix( M, N, hX, lda, dX[d], ldda );
                //if (d == 0) magma_ssetmatrix( M, N, hB, lda, dB[d], ldda ); // this is wrong coz when offset != 0 the gpu who do the beta*C may be not 0 so this should be related to stdev(starting device who own i=0 first col)
                magma_ssetmatrix( M, N, hB, lda, dB[d], ldda );
            }
        
            //memset(hR, 0, lda*N*sizeof(float));
    
            //trace_init( 1, opts.ngpu, nstream, (magma_queue_t*) streams );
    
            //magma_int_t offset = 0; //nb;
    
            gpu_time = magma_sync_wtime(0);
        
            magmablas_ssymm_mgpu_com(
                MagmaLeft, MagmaLower, msize, N,
                calpha,    dA, ldda, offset,
                           dX, ldda,
                cbeta,     dB, ldda, dwork, dworksiz, hR, lda, hwork, hworksiz,
                opts.ngpu, nb, streams, nstream, redevents2, nbevents, gnode, nbcmplx);
           
            gpu_time = magma_sync_wtime(0) - gpu_time;
            gpu_perf = gflops / gpu_time;
                
            #ifdef TRACING
            char buf[80];
            snprintf( buf, sizeof(buf), "ssymm-m%d-n%d-nb%d-stream%d-ngpu%d-run%d.svg",
                      (int) M, (int) N, (int) nb, (int) nstream, (int) opts.ngpu, (int) j );
            trace_finalize( buf, "trace.css" );
            #endif
            
            /* ====================================================================
               Performs operation using CUBLAS
               =================================================================== */
            if ( opts.check && iter == 0 ) {
                magma_setdevice( 0 );
                magmablasSetKernelStream(  0  );
                magma_ssetmatrix( M, M, hA, lda, dA2, ldda );
                magma_ssetmatrix( M, N, hX, lda, dX[0], ldda );
                magma_ssetmatrix( M, N, hB, lda, dwork[0], ldda );
                
                gpu_time2 = magma_sync_wtime(0);
                magma_ssymm(
                    MagmaLeft, MagmaLower, msize, N,
                    calpha,    dA2+offset*ldda+offset, ldda,
                               dX[0],    ldda,
                    cbeta,     dwork[0], ldda );
                gpu_time2 = magma_sync_wtime(0) - gpu_time2;
                gpu_perf2 = gflops / gpu_time2;
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.check ) {
                // store ||A||*||X||
                errorbis  = lapackf77_slange("fro", &msize, &msize, hA+offset*lda+offset, &lda, work );
                errorbis *= lapackf77_slange("fro", &msize, &N, hX, &lda, work );
                
                //printf( "A =" ); magma_sprint( M, M, hA, lda );
                //printf( "X =" ); magma_sprint( M, N, hX, lda );
                //printf( "B =" ); magma_sprint( M, N, hB, lda );
                
                cpu_time = magma_wtime();
                blasf77_ssymm( "Left", "Lower", &msize, &N,
                                &calpha, hA+offset*lda+offset, &lda,
                                         hX, &lda,
                                &cbeta,  hB, &lda );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                /*
                trace_file = fopen("AJETE/C", "w");
                for (int j = 0; j < N; j++)
                    for (int i = 0; i < siz; i++)
                        fprintf(trace_file, "%10d%10d%40.30e\n", i+1, j+1, hB[j*lda+i]);
                fclose(trace_file);
                */
                magma_int_t firstprint=0;
                for(magma_int_t dev=0; dev < opts.ngpu; ++dev) {
                    magma_setdevice( dev );
                    magma_sgetmatrix( M, N, dB[dev], ldda, hR, lda );
    
                    // compute relative error ||R||/||A||*||X||, where R := B_magma - B_lapack = R - B
                    size = lda*N;
                    blasf77_saxpy( &size, &c_neg_one, hB, &ione, hR, &ione );
                    error = lapackf77_slange("fro", &msize, &N, hR, &lda, work) / errorbis;
                    
                    //printf( "R ="  ); magma_sprint( M, N, hR, lda );
                    if (firstprint == 0) {
                        printf( "%5d %5d %5d %5d   %7.1f (%7.4f)   %7.1f (%7.4f)   %7.1f (%7.4f)   %8.2e   %s\n",
                                (int) M, (int) N, (int) nb, (int) offset,
                                cpu_perf, cpu_time,
                                gpu_perf, gpu_time,
                                gpu_perf2, gpu_time2,
                                error, (error < tol ? "ok" : "failed") );
                    }
                    else {
                        printf( "%89s  %8.2e   %s\n", " ",
                                error, (error < tol ? "ok" : "failed") );
                    }
                    status += ! (error < tol);
                    firstprint =1;
                }
            } else {
                printf( "%5d %5d %5d %5d     ---   (  ---  )   %7.1f (%7.4f)     ---   (  ---  )   ---\n",
                        (int) M, (int) N, (int) nb, (int) offset,
                        gpu_perf, gpu_time );
            }
    
            TESTING_FREE_CPU( hA );
            TESTING_FREE_CPU( hX );
            TESTING_FREE_CPU( hB );
            
            TESTING_FREE_PIN( hR );
        
            for( int d = 0; d < opts.ngpu; ++d ) {
                magma_setdevice( d );
                TESTING_FREE_DEV( dA[d]    );
                TESTING_FREE_DEV( dX[d]    );
                TESTING_FREE_DEV( dB[d]    );
                TESTING_FREE_DEV( dwork[d] );
                
                TESTING_FREE_PIN( hwork[d] );
            }
            TESTING_FREE_PIN( hwork[opts.ngpu] );
        
            if ( opts.check ) {
                magma_setdevice( 0 );
                TESTING_FREE_DEV( dA2 );
            }
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
      }  // offset
      printf( "\n" );
    }

    for( int d = 0; d < opts.ngpu; ++d ) {
        magma_setdevice( d );
        for( magma_int_t i = 0; i < nstream; ++i ) {
            magma_queue_destroy( streams[d][i] );
        }
        for( magma_int_t i = 0; i < nbevents; ++i ) {
            magma_event_destroy( redevents[d][i]  );
            magma_event_destroy( redevents2[d][i] );
        }
    }
    
    TESTING_FINALIZE();
    return status;
}
Ejemplo n.º 16
0
int main(int ac, char **av)
{

	int lc, i;		/* loop counter */
	char *msg;		/* message returned from parse_opts */

	/* parse standard options */
	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	setup();

	for (lc = 0; TEST_LOOPING(lc); lc++) {

		Tst_count = 0;

		for (i = 0; i < TST_TOTAL; i++) {

			if (testcase[i].setupfunc &&
			    testcase[i].setupfunc() == -1) {
				tst_resm(TWARN, "Failed to setup test %d."
					 " Skipping test", i);
				continue;
			} else {
				TEST(syscall(__NR_swapoff, testcase[i].path));
			}

			if (testcase[i].cleanfunc &&
			    testcase[i].cleanfunc() == -1) {
				tst_brkm(TBROK, cleanup, "cleanup failed,"
					 " quitting the test");
			}

			/* check return code */
			if ((TEST_RETURN == -1) && (TEST_ERRNO == testcase[i].
						    exp_errno)) {
				tst_resm(TPASS, "swapoff(2) expected failure;"
					 " Got errno - %s : %s",
					 testcase[i].exp_errval,
					 testcase[i].err_desc);

			} else {
				tst_resm(TFAIL, "swapoff(2) failed to produce"
					 " expected error; %d, errno"
					 ": %s and got %d",
					 testcase[i].exp_errno,
					 testcase[i].exp_errval, TEST_ERRNO);

				if ((TEST_RETURN == 0) && (i == 2)) {
					if (syscall(__NR_swapon, "./swapfile01", 0) != 0) {
						tst_brkm(TBROK, cleanup,
							 " Failed to turn on"
							 " swap file");
					}
				}
			}

			TEST_ERROR_LOG(TEST_ERRNO);
		}		/*End of TEST LOOPS */
	}

	/*Clean up and exit */
	cleanup();

	tst_exit();
}				/*End of main */
Ejemplo n.º 17
0
Archivo: stat03.c Proyecto: Nudiv/ltp
int main(int ac, char **av)
{
	struct stat stat_buf;	/* stat structure buffer */
	int lc;
	const char *msg;
	char *file_name;	/* ptr. for file name whose mode is modified */
	char *test_desc;	/* test specific error message */
	int ind;		/* counter to test different test conditions */

	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	/*
	 * Invoke setup function to call individual test setup functions
	 * to simulate test conditions.
	 */
	setup();

	/* set the expected errnos... */
	TEST_EXP_ENOS(exp_enos);

	for (lc = 0; TEST_LOOPING(lc); lc++) {

		tst_count = 0;

		for (ind = 0; Test_cases[ind].desc != NULL; ind++) {
			file_name = Test_cases[ind].pathname;
			test_desc = Test_cases[ind].desc;

#if !defined(UCLINUX)
			if (file_name == High_address_node) {
				file_name = (char *)get_high_address();
			}
#endif

			/*
			 * Call stat(2) to test different test conditions.
			 * verify that it fails with -1 return value and
			 * sets appropriate errno.
			 */
			TEST(stat(file_name, &stat_buf));

			/* Check return code from stat(2) */
			if (TEST_RETURN == -1) {
				TEST_ERROR_LOG(TEST_ERRNO);
				if (TEST_ERRNO == Test_cases[ind].exp_errno) {
					tst_resm(TPASS,
						 "stat() fails, %s, errno:%d",
						 test_desc, TEST_ERRNO);
				} else {
					tst_resm(TFAIL,
						 "stat() fails, %s, errno:%d, expected errno:%d",
						 test_desc, TEST_ERRNO,
						 Test_cases[ind].exp_errno);
				}
			} else {
				tst_resm(TFAIL,
					 "stat(2) returned %ld, expected -1, errno:%d",
					 TEST_RETURN,
					 Test_cases[ind].exp_errno);
			}
		}
		tst_count++;	/* incr TEST_LOOP counter */
	}

	/*
	 * Invoke cleanup() to delete the test directory/file(s) created
	 * in the setup().
	 */
	cleanup();
	tst_exit();

}
Ejemplo n.º 18
0
int main(int ac, char *av[])
{
	int *fildes;
	int ifile;
	char pfilname[40];
	int min;
	int serrno;

	int lc;
	const char *msg;

	ifile = -1;

	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);

	local_flag = PASSED;

	setup();

	for (lc = 0; TEST_LOOPING(lc); lc++) {

		min = getdtablesize();	/* get number of files allowed open */

		fildes = malloc((min + 10) * sizeof(int));
		if (fildes == NULL)
			tst_brkm(TBROK | TERRNO, cleanup, "malloc error");

		sprintf(pfilname, "./dup205.%d\n", getpid());
		unlink(pfilname);
		serrno = 0;
		if ((fildes[0] = creat(pfilname, 0666)) == -1)
			tst_brkm(TBROK | TERRNO, cleanup, "creat failed");
		else {
			fildes[fildes[0]] = fildes[0];
			for (ifile = fildes[0] + 1; ifile < min + 10; ifile++) {
				if ((fildes[ifile] = dup2(fildes[ifile - 1],
							  ifile)) == -1) {
					serrno = errno;
					break;
				} else {
					if (fildes[ifile] != ifile) {
						tst_brkm(TFAIL, cleanup,
							 "got wrong descriptor "
							 "number back (%d != %d)",
							 fildes[ifile], ifile);
					}
				}
			}	/* end for */
			if (ifile < min) {
				tst_resm(TFAIL, "Not enough files duped");
				local_flag = FAILED;
			} else if (ifile > min) {
				tst_resm(TFAIL, "Too many files duped");
				local_flag = FAILED;
			}
			if (serrno != EBADF && serrno != EMFILE &&
			    serrno != EINVAL) {
				tst_resm(TFAIL, "bad errno on dup2 failure");
				local_flag = FAILED;
			}
		}
		unlink(pfilname);
		for (ifile = fildes[0]; ifile < min + 10; ifile++)
			close(fildes[ifile]);
		if (local_flag == PASSED) {
			tst_resm(TPASS, "Test passed.");
		} else {
			tst_resm(TFAIL, "Test failed.");
		}

	}
	cleanup();
	tst_exit();
}
Ejemplo n.º 19
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing ssygvdx
*/
int main( int argc, char** argv)
{

    TESTING_INIT();

    real_Double_t gpu_time;

    float *h_A, *h_R, *h_work;

#if defined(PRECISION_z) || defined(PRECISION_c)
    float *rwork;
    magma_int_t lrwork;
#endif

    /* Matrix size */
    float *w1, *w2;
    magma_int_t *iwork;
    magma_int_t N, n2, info, lwork, liwork;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};;
    magma_int_t info_ortho     = 0;
    magma_int_t info_solution  = 0;
    magma_int_t info_reduction = 0;

    magma_timestr_t start, end;

    magma_opts opts;
    parse_opts( argc, argv, &opts );

    magma_int_t ngpu = opts.ngpu;
    char jobz = opts.jobz;
    magma_int_t checkres = opts.check;

    char range = 'A';
    char uplo = opts.uplo;
    magma_int_t itype = opts.itype;
    float f = opts.fraction;

    if (f != 1)
        range='I';

    if ( checkres && jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        jobz = MagmaVec;
    }

    printf("using: itype = %d, jobz = %c, range = %c, uplo = %c, checkres = %d, fraction = %6.4f\n",
           (int) itype, jobz, range, uplo, (int) checkres, f);

    printf("  N     M     GPU Time(s) \n");
    printf("==========================\n");
    magma_int_t threads = magma_get_numthreads();
    for( magma_int_t i = 0; i < opts.ntest; ++i ) {
        for( magma_int_t iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            n2     = N*N;
#if defined(PRECISION_z) || defined(PRECISION_c)
            lwork  = magma_sbulge_get_lq2(N, threads) + 2*N + N*N;
            lrwork = 1 + 5*N +2*N*N;
#else
            lwork  = magma_sbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N;
#endif
            liwork = 3 + 5*N;

            /* Allocate host memory for the matrix */
            TESTING_MALLOC(   h_A, float, n2);
            TESTING_MALLOC(    w1, float         ,  N);
            TESTING_MALLOC(    w2, float         ,  N);
            TESTING_HOSTALLOC(h_R, float, n2);
            TESTING_HOSTALLOC(h_work, float,  lwork);
#if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_HOSTALLOC( rwork,          float, lrwork);
#endif
            TESTING_MALLOC(    iwork,     magma_int_t, liwork);


            /* Initialize the matrix */
            lapackf77_slarnv( &ione, ISEED, &n2, h_A );
            /* Make diagonal real */
            for(int i=0; i<N; i++) {
                MAGMA_S_SET2REAL( h_A[i*N+i], MAGMA_S_REAL(h_A[i*N+i]) );
            }


            magma_int_t m1 = 0;
            float vl = 0;
            float vu = 0;
            magma_int_t il = 0;
            magma_int_t iu = 0;
            if (range == 'I'){
                il = 1;
                iu = (int) (f*N);
            }

            if(opts.warmup){
                // ==================================================================
                // Warmup using MAGMA
                // ==================================================================
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                if(ngpu==1){
                    printf("calling ssyevdx_2stage 1 GPU\n");
                    magma_ssyevdx_2stage(jobz, range, uplo, N, 
                                    h_R, N, 
                                    vl, vu, il, iu, 
                                    &m1, w1, 
                                    h_work, lwork, 
#if defined(PRECISION_z) || defined(PRECISION_c)
                                    rwork, lrwork, 
#endif         
                                    iwork, liwork, 
                                    &info);
               
                }else{
                    printf("calling ssyevdx_2stage_m %d GPU\n", (int) ngpu);
                    magma_ssyevdx_2stage_m(ngpu, jobz, range, uplo, N, 
                                    h_R, N, 
                                    vl, vu, il, iu, 
                                    &m1, w1, 
                                    h_work, lwork, 
#if defined(PRECISION_z) || defined(PRECISION_c)
                                    rwork, lrwork, 
#endif         
                                    iwork, liwork, 
                                    &info);
                }
            }


            // ===================================================================
            // Performs operation using MAGMA
            // ===================================================================
            lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
            start = get_current_time();
            if(ngpu==1){
                printf("calling ssyevdx_2stage 1 GPU\n");
                magma_ssyevdx_2stage(jobz, range, uplo, N, 
                                h_R, N, 
                                vl, vu, il, iu, 
                                &m1, w1, 
                                h_work, lwork, 
#if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork, 
#endif     
                                iwork, liwork, 
                                &info);
           
            }else{
                printf("calling ssyevdx_2stage_m %d GPU\n", (int) ngpu);
                magma_ssyevdx_2stage_m(ngpu, jobz, range, uplo, N, 
                                h_R, N, 
                                vl, vu, il, iu, 
                                &m1, w1, 
                                h_work, lwork, 
#if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork, 
#endif     
                                iwork, liwork, 
                                &info);
            }
            end = get_current_time();
            gpu_time = GetTimerValue(start,end)/1000.;

            if ( checkres ) {
                float eps   = lapackf77_slamch("E");
                printf("\n");
                printf("------ TESTS FOR MAGMA SSYEVD ROUTINE -------  \n");
                printf("        Size of the Matrix %d by %d\n", (int) N, (int) N);
                printf("\n");
                printf(" The matrix A is randomly generated for each test.\n");
                printf("============\n");
                printf(" The relative machine precision (eps) is %8.2e\n",eps);
                printf(" Computational tests pass if scaled residuals are less than 60.\n");
              
                /* Check the orthogonality, reduction and the eigen solutions */
                if (jobz == MagmaVec) {
                    info_ortho = check_orthogonality(N, N, h_R, N, eps);
                    info_reduction = check_reduction(uplo, N, 1, h_A, w1, N, h_R, eps);
                }
                printf("------ CALLING LAPACK SSYEVD TO COMPUTE only eigenvalue and verify elementswise -------  \n");
                lapackf77_ssyevd("N", "L", &N, 
                                h_A, &N, w2, 
                                h_work, &lwork, 
#if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, &lrwork, 
#endif     
                                iwork, &liwork, 
                                &info);
                info_solution = check_solution(N, w2, w1, eps);
              
                if ( (info_solution == 0) & (info_ortho == 0) & (info_reduction == 0) ) {
                    printf("***************************************************\n");
                    printf(" ---- TESTING SSYEVD ...................... PASSED !\n");
                    printf("***************************************************\n");
                }
                else {
                    printf("************************************************\n");
                    printf(" - TESTING SSYEVD ... FAILED !\n");
                    printf("************************************************\n");
                }
            }


            /* =====================================================================
             Print execution time
             =================================================================== */
            printf("%5d %5d     %6.2f\n",
                   (int) N, (int) m1, gpu_time);

            TESTING_FREE(       h_A);
            TESTING_FREE(        w1);
            TESTING_FREE(        w2);
#if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_HOSTFREE( rwork);
#endif
            TESTING_FREE(     iwork);
            TESTING_HOSTFREE(h_work);
            TESTING_HOSTFREE(   h_R);
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    /* Shutdown */
    TESTING_FINALIZE();

    return 0;
}
Ejemplo n.º 20
0
int main(int ac, char **av)
{
	int lc;			/* loop counter */
	char *msg;		/* message returned from parse_opts */
	pid_t cpid;		/* Child process id */
	int status;		/* child exit status */

	/* Parse standard options given to run the test. */
	msg = parse_opts(ac, av, (option_t *) NULL, NULL);
	if (msg != (char *)NULL) {
		tst_brkm(TBROK, tst_exit, "OPTION PARSING ERROR - %s", msg);
	}
#ifdef UCLINUX
	maybe_run_child(&do_child, "dddd", &timereq.tv_sec, &timereq.tv_nsec,
			&timerem.tv_sec, &timerem.tv_nsec);
#endif

	/* Perform global setup for test */
	setup();

	/* Check looping state if -i option given */
	for (lc = 0; TEST_LOOPING(lc); lc++) {

		/* Reset Tst_count in case we are looping. */
		Tst_count = 0;

		/*
		 * Creat a child process and suspend its
		 * execution using nanosleep()
		 */
		if ((cpid = FORK_OR_VFORK()) == -1) {
			tst_brkm(TBROK, cleanup,
				 "fork() failed to create child process");
		}

		if (cpid == 0) {	/* Child process */
#ifdef UCLINUX
			if (self_exec(av[0], "dddd",
				      timereq.tv_sec, timereq.tv_nsec,
				      timerem.tv_sec, timerem.tv_nsec) < 0) {
				tst_brkm(TBROK, cleanup, "self_exec failed");
			}
#else
			do_child();
#endif
		}

		/* wait for child to time slot for execution */
		sleep(1);

		/* Now send signal to child */
		if (kill(cpid, SIGINT) < 0) {
			tst_brkm(TBROK, cleanup,
				 "kill() fails send signal to child");
		}

		/* Wait for child to execute */
		wait(&status);
		if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
			tst_resm(TPASS, "Functionality of nanosleep() is "
					"correct");
		} else {
			tst_resm(TFAIL, "child process exited abnormally; "
					"status = %d", status);
		}
	}			/* End for TEST_LOOPING */

	/* Call cleanup() to undo setup done for the test. */
	cleanup();

	 /*NOTREACHED*/ return 0;

}				/* End main */
Ejemplo n.º 21
0
int main(int ac, char **av)
{
	int lc;			/* loop counter */
	char *msg;		/* message returned from parse_opts */

	struct flock flock;

	/* parse standard options */
	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) {
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
	}

	setup();

	/* check for looping state if -i option is given */
	for (lc = 0; TEST_LOOPING(lc); lc++) {

		/* reset Tst_count in case we are looping */
		Tst_count = 0;

/* //block1: */
		tst_resm(TINFO, "Enter block 1");
		tst_resm(TINFO, "Test for errno EINVAL");
		fail = 0;

		if (fcntl(1, F_BADCMD, 1) != -1) {
			tst_resm(TFAIL, "fcntl(2) failed to FAIL");
			fail = 1;
		} else if (errno != EINVAL) {
			tst_resm(TFAIL, "Expected EINVAL got %d", errno);
			fail = 1;
		}

		if (fail) {
			tst_resm(TINFO, "block 1 FAILED");
		} else {
			tst_resm(TINFO, "block 1 PASSED");
		}
		tst_resm(TINFO, "Exit block 1");

/* //block2: */
#ifndef UCLINUX
		/* Skip since uClinux does not implement memory protection */
		tst_resm(TINFO, "Enter block 2");
		tst_resm(TINFO, "Test for errno EFAULT");
		fail = 0;

		/* case 1: F_SETLK */
		if (fcntl(1, F_SETLK, (void *)-1) != -1) {
			tst_resm(TFAIL, "F_SETLK: fcntl(2) failed to FAIL");
			fail = 1;
		} else if (errno != EFAULT) {
			tst_resm(TFAIL, "F_SETLK: Expected EFAULT got %d",
				 errno);
			fail = 1;
		}

		/* case 2: F_SETLKW */
		if (fcntl(1, F_SETLKW, (void *)-1) != -1) {
			tst_resm(TFAIL, "F_SETLKW: fcntl(2) failed to FAIL");
			fail = 1;
		} else if (errno != EFAULT) {
			tst_resm(TFAIL, "F_SETLKW: Expected EFAULT got %d",
				 errno);
			fail = 1;
		}

		/* case 3: F_GETLK */
		if (fcntl(1, F_GETLK, (void *)-1) != -1) {
			tst_resm(TFAIL, "F_GETLK: fcntl(2) failed to FAIL");
			fail = 1;
		} else if (errno != EFAULT) {
			tst_resm(TFAIL, "F_GETLK: Expected EFAULT got %d",
				 errno);
			fail = 1;
		}

		if (fail) {
			tst_resm(TINFO, "blcok 2 FAILED");
		} else {
			tst_resm(TINFO, "block 2 PASSED");
		}
		tst_resm(TINFO, "Exit block 2");
#else
		tst_resm(TINFO, "Skip block 2 on uClinux");
#endif

/* //block3: */
		tst_resm(TINFO, "Enter block 3");
		tst_resm(TINFO, "Test for errno EINVAL");
		fail = 0;

		flock.l_whence = -1;
		flock.l_type = F_WRLCK;
		flock.l_start = 0L;
		flock.l_len = 0L;

		if (fcntl(1, F_SETLK, &flock) != -1) {
			tst_resm(TFAIL, "fcntl(2) failed to FAIL");
			fail = 1;
		} else if (errno != EINVAL) {
			tst_resm(TFAIL, "Expected EINVAL, got %d", errno);
			fail = 1;
		}

		if (fail) {
			tst_resm(TINFO, "block 3 FAILED");
		} else {
			tst_resm(TINFO, "block 3 PASSED");
		}
		tst_resm(TINFO, "Exit block 3");

/* //block4: */
		tst_resm(TINFO, "Enter block 4");
		tst_resm(TINFO, "Test for errno EBADF");
		fail = 0;

		if (fcntl(-1, F_GETLK, &flock) != -1) {
			tst_resm(TFAIL, "fcntl(2) failed to FAIL");
			fail = 1;
		} else if (errno != EBADF) {
			tst_resm(TFAIL, "Expected EBADF, got %d", errno);
			fail = 1;
		}

		if (fail) {
			tst_resm(TINFO, "block 4 FAILED");
		} else {
			tst_resm(TINFO, "block 4 PASSED");
		}
		tst_resm(TINFO, "Exit block 4");
	}
	cleanup();
	tst_exit();
}
Ejemplo n.º 22
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zgemm
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gflops, magma_perf, magma_time, dev_perf, dev_time, cpu_perf, cpu_time;
    double          magma_error, dev_error, Cnorm, work[1];
    magma_int_t M, N, K;
    magma_int_t Am, An, Bm, Bn;
    magma_int_t sizeA, sizeB, sizeC;
    magma_int_t lda, ldb, ldc, ldda, lddb, lddc;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;
    
    magmaDoubleComplex *h_A, *h_B, *h_C, *h_Cmagma, *h_Cdev;
    magmaDoubleComplex_ptr d_A, d_B, d_C;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex alpha = MAGMA_Z_MAKE(  0.29, -0.86 );
    magmaDoubleComplex beta  = MAGMA_Z_MAKE( -0.48,  0.38 );
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    double tol = opts.tolerance * lapackf77_dlamch("E");

    #ifdef HAVE_CUBLAS
        // for CUDA, we can check MAGMA vs. CUBLAS, without running LAPACK
        printf("If running lapack (option --lapack), MAGMA and %s error are both computed\n"
               "relative to CPU BLAS result. Else, MAGMA error is computed relative to %s result.\n\n",
                g_platform_str, g_platform_str );
        printf("transA = %s, transB = %s\n",
               lapack_trans_const(opts.transA),
               lapack_trans_const(opts.transB) );
        printf("    M     N     K   MAGMA Gflop/s (ms)  %s Gflop/s (ms)   CPU Gflop/s (ms)  MAGMA error  %s error\n",
                g_platform_str, g_platform_str );
    #else
        // for others, we need LAPACK for check
        opts.lapack |= opts.check;  // check (-c) implies lapack (-l)
        printf("transA = %s, transB = %s\n",
               lapack_trans_const(opts.transA),
               lapack_trans_const(opts.transB) );
        printf("    M     N     K   %s Gflop/s (ms)   CPU Gflop/s (ms)  %s error\n",
                g_platform_str, g_platform_str );
    #endif
    printf("=========================================================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            M = opts.msize[itest];
            N = opts.nsize[itest];
            K = opts.ksize[itest];
            gflops = FLOPS_ZGEMM( M, N, K ) / 1e9;

            if ( opts.transA == MagmaNoTrans ) {
                lda = Am = M;
                An = K;
            } else {
                lda = Am = K;
                An = M;
            }
            
            if ( opts.transB == MagmaNoTrans ) {
                ldb = Bm = K;
                Bn = N;
            } else {
                ldb = Bm = N;
                Bn = K;
            }
            ldc = M;
            
            ldda = ((lda+31)/32)*32;
            lddb = ((ldb+31)/32)*32;
            lddc = ((ldc+31)/32)*32;
            
            sizeA = lda*An;
            sizeB = ldb*Bn;
            sizeC = ldc*N;
            
            TESTING_MALLOC_CPU( h_A,       magmaDoubleComplex, lda*An );
            TESTING_MALLOC_CPU( h_B,       magmaDoubleComplex, ldb*Bn );
            TESTING_MALLOC_CPU( h_C,       magmaDoubleComplex, ldc*N  );
            TESTING_MALLOC_CPU( h_Cmagma,  magmaDoubleComplex, ldc*N  );
            TESTING_MALLOC_CPU( h_Cdev,    magmaDoubleComplex, ldc*N  );
            
            TESTING_MALLOC_DEV( d_A, magmaDoubleComplex, ldda*An );
            TESTING_MALLOC_DEV( d_B, magmaDoubleComplex, lddb*Bn );
            TESTING_MALLOC_DEV( d_C, magmaDoubleComplex, lddc*N  );
            
            /* Initialize the matrices */
            lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
            lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
            lapackf77_zlarnv( &ione, ISEED, &sizeC, h_C );
            
            magma_zsetmatrix( Am, An, h_A, lda, d_A, ldda );
            magma_zsetmatrix( Bm, Bn, h_B, ldb, d_B, lddb );
            
            /* =====================================================================
               Performs operation using MAGMABLAS (currently only with CUDA)
               =================================================================== */
            #ifdef HAVE_CUBLAS
                magma_zsetmatrix( M, N, h_C, ldc, d_C, lddc );
                
                magma_time = magma_sync_wtime( NULL );
                magmablas_zgemm( opts.transA, opts.transB, M, N, K,
                                 alpha, d_A, ldda,
                                        d_B, lddb,
                                 beta,  d_C, lddc );
                magma_time = magma_sync_wtime( NULL ) - magma_time;
                magma_perf = gflops / magma_time;
                
                magma_zgetmatrix( M, N, d_C, lddc, h_Cmagma, ldc );
            #endif
            
            /* =====================================================================
               Performs operation using CUBLAS / clBLAS / Xeon Phi MKL
               =================================================================== */
            magma_zsetmatrix( M, N, h_C, ldc, d_C, lddc );
            
            #ifdef HAVE_CUBLAS
                dev_time = magma_sync_wtime( NULL );
                cublasZgemm( opts.handle, cublas_trans_const(opts.transA), cublas_trans_const(opts.transB), M, N, K,
                             &alpha, d_A, ldda,
                                     d_B, lddb,
                             &beta,  d_C, lddc );
                dev_time = magma_sync_wtime( NULL ) - dev_time;
            #else
                dev_time = magma_sync_wtime( opts.queue );
                magma_zgemm( opts.transA, opts.transB, M, N, K,
                             alpha, d_A, 0, ldda,
                                    d_B, 0, lddb,
                             beta,  d_C, 0, lddc, opts.queue );
                dev_time = magma_sync_wtime( opts.queue ) - dev_time;
            #endif
            dev_perf = gflops / dev_time;
            
            magma_zgetmatrix( M, N, d_C, lddc, h_Cdev, ldc );
            
            /* =====================================================================
               Performs operation using CPU BLAS
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                blasf77_zgemm( lapack_trans_const(opts.transA), lapack_trans_const(opts.transB), &M, &N, &K,
                               &alpha, h_A, &lda,
                                       h_B, &ldb,
                               &beta,  h_C, &ldc );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
            }
            
            /* =====================================================================
               Check the result
               =================================================================== */
            if ( opts.lapack ) {
                // compute relative error for both magma & dev, relative to lapack,
                // |C_magma - C_lapack| / |C_lapack|
                Cnorm = lapackf77_zlange( "F", &M, &N, h_C, &ldc, work );
                
                blasf77_zaxpy( &sizeC, &c_neg_one, h_C, &ione, h_Cdev, &ione );
                dev_error = lapackf77_zlange( "F", &M, &N, h_Cdev, &ldc, work ) / Cnorm;
                
                #ifdef HAVE_CUBLAS
                    blasf77_zaxpy( &sizeC, &c_neg_one, h_C, &ione, h_Cmagma, &ione );
                    magma_error = lapackf77_zlange( "F", &M, &N, h_Cmagma, &ldc, work ) / Cnorm;
                    
                    printf("%5d %5d %5d   %7.2f (%7.2f)    %7.2f (%7.2f)   %7.2f (%7.2f)    %8.2e     %8.2e   %s\n",
                           (int) M, (int) N, (int) K,
                           magma_perf,  1000.*magma_time,
                           dev_perf,    1000.*dev_time,
                           cpu_perf,    1000.*cpu_time,
                           magma_error, dev_error,
                           (magma_error < tol && dev_error < tol ? "ok" : "failed"));
                    status += ! (magma_error < tol && dev_error < tol);
                #else
                    printf("%5d %5d %5d   %7.2f (%7.2f)   %7.2f (%7.2f)    %8.2e   %s\n",
                           (int) M, (int) N, (int) K,
                           dev_perf,    1000.*dev_time,
                           cpu_perf,    1000.*cpu_time,
                           dev_error,
                           (dev_error < tol ? "ok" : "failed"));
                    status += ! (dev_error < tol);
                #endif
            }
            else {
                #ifdef HAVE_CUBLAS
                    // compute relative error for magma, relative to dev (currently only with CUDA)
                    Cnorm = lapackf77_zlange( "F", &M, &N, h_Cdev, &ldc, work );
                    
                    blasf77_zaxpy( &sizeC, &c_neg_one, h_Cdev, &ione, h_Cmagma, &ione );
                    magma_error = lapackf77_zlange( "F", &M, &N, h_Cmagma, &ldc, work ) / Cnorm;
                    
                    printf("%5d %5d %5d   %7.2f (%7.2f)    %7.2f (%7.2f)     ---   (  ---  )    %8.2e        ---    %s\n",
                           (int) M, (int) N, (int) K,
                           magma_perf,  1000.*magma_time,
                           dev_perf,    1000.*dev_time,
                           magma_error,
                           (magma_error < tol ? "ok" : "failed"));
                    status += ! (magma_error < tol);
                #else
                    printf("%5d %5d %5d   %7.2f (%7.2f)     ---   (  ---  )       ---\n",
                           (int) M, (int) N, (int) K,
                           dev_perf,    1000.*dev_time );
                #endif
            }
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_CPU( h_B );
            TESTING_FREE_CPU( h_C );
            TESTING_FREE_CPU( h_Cmagma  );
            TESTING_FREE_CPU( h_Cdev    );
            
            TESTING_FREE_DEV( d_A );
            TESTING_FREE_DEV( d_B );
            TESTING_FREE_DEV( d_C );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}
Ejemplo n.º 23
0
int main(int ac, char **av)
{
	int lc;
	const char *msg;
	int rval;
	pid_t pid, pid1;
	int status;
	struct passwd *ltpuser1, *ltpuser2;

	/*
	 * parse standard options
	 */
	if ((msg = parse_opts(ac, av, NULL, NULL)) != NULL) {
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
	}

	/*
	 * perform global setup for test
	 */
	setup();

	/* set the expected errnos... */
	TEST_EXP_ENOS(exp_enos);

	/*
	 * check looping state if -i option given
	 */
	for (lc = 0; TEST_LOOPING(lc); lc++) {

		tst_count = 0;

		/* Initialize the test directories name */
		sprintf(tstdir1, "tstdir1.%d", getpid());
		ltpuser1 = my_getpwnam(user1name);

		if ((pid = FORK_OR_VFORK()) < 0) {
			tst_brkm(TBROK, cleanup, "fork #1 failed");
		}

		if (pid == 0) {	/* first child */
			/* set to ltpuser1 */
			rval = setreuid(ltpuser1->pw_uid, ltpuser1->pw_uid);
			if (rval < 0) {
				tst_resm(TFAIL, "setreuid failed to "
					 "to set the real uid to %d and "
					 "effective uid to %d",
					 ltpuser1->pw_uid, ltpuser1->pw_uid);
				perror("setreuid");
				exit(1);
			}
			/* create the parent directory with 0700 permits */
			if (mkdir(tstdir1, PERMS) == -1) {
				tst_resm(TFAIL, "mkdir(%s, %#o) Failed",
					 tstdir1, PERMS);
				exit(1);
			}
			/* create tstdir1 succeeded */
			exit(0);
		}
		wait(&status);
		if (WEXITSTATUS(status) != 0) {
			tst_brkm(TFAIL, cleanup,
				 "Test to check mkdir EACCES failed"
				 "in create parent directory");
		}

		sprintf(tstdir2, "%s/tst", tstdir1);
		ltpuser2 = my_getpwnam(user2name);

		if ((pid1 = FORK_OR_VFORK()) < 0) {
			tst_brkm(TBROK, cleanup, "fork #2 failed");
		}

		if (pid1 == 0) {	/* second child */
			/* set to ltpuser2 */
			rval = setreuid(ltpuser2->pw_uid, ltpuser2->pw_uid);
			if (rval < 0) {
				tst_resm(TFAIL, "setreuid failed to "
					 "to set the real uid to %d and "
					 "effective uid to %d",
					 ltpuser2->pw_uid, ltpuser2->pw_uid);
				perror("setreuid");
				exit(1);
			}
			if (mkdir(tstdir2, PERMS) != -1) {
				tst_resm(TFAIL, "mkdir(%s, %#o) unexpected "
					 "succeeded", tstdir2, PERMS);
				exit(1);
			}
			if (errno != EACCES) {
				tst_resm(TFAIL, "Expected EACCES got %d",
					 errno);
				exit(1);
			}
			/* PASS */
			exit(0);
		}
		waitpid(pid1, &status, 0);
		if (WEXITSTATUS(status) == 0) {
			tst_resm(TPASS, "Test to attempt to creat a directory "
				 "in a directory having no permissions "
				 "SUCCEEDED in setting errno to EACCES");
		} else {
			tst_resm(TFAIL, "Test to attempt to creat a directory "
				 "in a directory having no permissions FAILED");
			cleanup();
		}
	}

	/*
	 * cleanup and exit
	 */
	cleanup();
	tst_exit();

}
Ejemplo n.º 24
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zhetrd_he2hb
*/
int main( int argc, char** argv)
{
    TESTING_INIT_MGPU();

    real_Double_t    gpu_time, gpu_perf, gflops;
    magmaDoubleComplex *h_A, *h_R, *h_work, *dT1;
    magmaDoubleComplex *tau;
    double *D, *E;

    /* Matrix size */
    magma_int_t N, n2, lda, lwork, ldt, lwork0;

    magma_int_t info;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};

#if defined(CHECKEIG)
#if defined(PRECISION_z)  || defined(PRECISION_d)
    magma_int_t WANTZ=0;
    magma_int_t THREADS=1;
#endif
#endif

    magma_int_t NE = 0;
    magma_int_t NB = 0;
    magma_int_t ngpu = 1;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    NB = opts.nb;
    if (NB < 1)
        NB  = 64; //64; //magma_get_zhetrd_he2hb_nb(N);

    // what is NE ?
    if (NE < 1)
        NE  = 64; //N;  //magma_get_zhetrd_he2hb_nb(N);  // N not yet initialized

    printf("  N    GPU GFlop/s   \n");
    printf("=====================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda = N;
            ldt = N;
            n2  = N*lda;
            gflops = FLOPS_ZHETRD( N ) / 1e9;
            
            /* We suppose the magma NB is bigger than lapack NB */
            lwork0 = N*NB;
        
            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( h_A,    magmaDoubleComplex, lda*N  );
            TESTING_MALLOC_CPU( tau,    magmaDoubleComplex, N-1    );
            
            TESTING_MALLOC_PIN( h_R,    magmaDoubleComplex, lda*N  );
            TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork0 );
            TESTING_MALLOC_PIN( D, double, N );
            TESTING_MALLOC_PIN( E, double, N );
            
            //TESTING_MALLOC_DEV( dT1, magmaDoubleComplex, (2*min(N,N)+(N+31)/32*32)*NB );
            TESTING_MALLOC_DEV( dT1, magmaDoubleComplex, (N*NB) );
        
            // if (WANTZ) gflops = 2.0*gflops;
    
            /* ====================================================================
               Initialize the matrix
               =================================================================== */
            lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
            magma_zmake_hermitian( N, h_A, lda );
            
            lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
    
    
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            magma_device_t cdev;
            magma_getdevice( &cdev );
    
            gpu_time = magma_wtime();
            /*
            magma_zhetrd_he2hb( opts.uplo, N, NB, h_R, lda, tau, h_work, lwork0, dT1, THREADS, &info);
            tband = magma_wtime - gpu_time();
            printf("  Finish BAND  N %d  NB %d  ngpu %d timing= %f\n", N, NB, ngpu, tband);
            magma_zhetrd_bhe2trc_v5(THREADS, WANTZ, opts.uplo, NE, N, NB, h_R, lda, D, E, dT1, ldt);
            */
    
            /*
            magma_zhetrd_he2hb( opts.uplo, N, NB, h_R, lda, tau, h_work, lwork, dT1, THREADS, &info);
            tband = magma_wtime - gpu_time();
            printf("  Finish BAND  N %d  NB %d  ngpu %d timing= %f\n", N, NB, ngpu, tband);
            magma_zhetrd_bhe2trc(THREADS, WANTZ, opts.uplo, NE, N, NB, h_R, lda, D, E, dT1, ldt);
            */

            magma_range_t range = MagmaRangeAll;
            magma_int_t fraction_ev = 100;
            magma_int_t il, iu, m1;
            double vl=0., vu=0.;
    
            if (fraction_ev == 0) {
                il = N / 10;
                iu = N / 5+il;
            }
            else {
                il = 1;
                iu = (int)(fraction_ev*N);
                if (iu < 1) iu = 1;
            }
            magmaDoubleComplex *hh_work;
            magma_int_t *iwork;
            magma_int_t nb, /*lwork,*/ liwork;
            magma_int_t threads = magma_get_parallel_numthreads();
            #if defined(PRECISION_z) || defined(PRECISION_c)
                double *rwork;
                magma_int_t lrwork;
                lwork  = magma_zbulge_get_lq2(N, threads) + 2*N + N*N;
                lrwork = 1 + 5*N +2*N*N;
                TESTING_MALLOC_PIN( rwork, double, lrwork );
            #else
                lwork  = magma_zbulge_get_lq2(N, threads) + 1 + 6*N + 2*N*N;
            #endif
            liwork = 3 + 5*N;
            nb = magma_get_zhetrd_nb(N);
            TESTING_MALLOC_PIN( hh_work, magmaDoubleComplex, lwork  );
            TESTING_MALLOC_CPU( iwork,   magma_int_t,        liwork );
    
            if (ngpu == 1) {
                printf("calling zheevdx_2stage 1 GPU\n");
                magma_zheevdx_2stage( opts.jobz, range, opts.uplo, N,
                                h_R, lda,
                                vl, vu, il, iu,
                                &m1, D,
                                hh_work, lwork,
                                #if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork,
                                #endif
                                iwork, liwork,
                                &info);
    
            } else {
                printf("calling zheevdx_2stage_m %d GPU\n", (int) ngpu);
                magma_zheevdx_2stage_m(ngpu, opts.jobz, range, opts.uplo, N,
                                h_R, lda,
                                vl, vu, il, iu,
                                &m1, D,
                                hh_work, lwork,
                                #if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork,
                                #endif
                                iwork, liwork,
                                &info);
            }
    
    
            magma_setdevice( cdev );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
    
            /* =====================================================================
               Check the factorization
               =================================================================== */
            /*
            if ( opts.check ) {
                FILE        *fp ;
    
                printf("Writing input matrix in matlab_i_mat.txt ...\n");
                fp = fopen ("matlab_i_mat.txt", "w") ;
                if ( fp == NULL ) { printf("Couldn't open output file\n"); exit(1); }
    
                for (j=0; j < N; j++) {
                    for (k=0; k < N; k++) {
                        #if defined(PRECISION_z) || defined(PRECISION_c)
                        fprintf(fp, "%5d %5d %11.8f %11.8f\n", k+1, j+1,
                                h_A[k+j*lda].x, h_A[k+j*lda].y);
                        #else
                        fprintf(fp, "%5d %5d %11.8f\n", k+1, j+1, h_A[k+j*lda]);
                        #endif
                    }
                }
                fclose( fp ) ;
    
                printf("Writing output matrix in matlab_o_mat.txt ...\n");
                fp = fopen ("matlab_o_mat.txt", "w") ;
                if ( fp == NULL ) { printf("Couldn't open output file\n"); exit(1); }
    
                for (j=0; j < N; j++) {
                    for (k=0; k < N; k++) {
                        #if defined(PRECISION_z) || defined(PRECISION_c)
                        fprintf(fp, "%5d %5d %11.8f %11.8f\n", k+1, j+1,
                                h_R[k+j*lda].x, h_R[k+j*lda].y);
                        #else
                        fprintf(fp, "%5d %5d %11.8f\n", k+1, j+1, h_R[k+j*lda]);
                        #endif
                    }
                }
                fclose( fp ) ;
            }
            */
    
    
    
            /* =====================================================================
               Print performance and error.
               =================================================================== */
#if defined(CHECKEIG)
#if defined(PRECISION_z)  || defined(PRECISION_d)
            if ( opts.check ) {
                printf("  Total N %5d  gflops %6.2f  timing %6.2f seconds\n", (int) N, gpu_perf, gpu_time );
                char JOBZ;
                if (WANTZ == 0)
                    JOBZ = 'N';
                else
                    JOBZ = 'V';
                double nrmI=0.0, nrm1=0.0, nrm2=0.0;
                int    lwork2 = 256*N;
                magmaDoubleComplex *work2, *AINIT;
                double *rwork2, *D2;
                // TODO free this memory !
                magma_zmalloc_cpu( &work2, lwork2 );
                magma_dmalloc_cpu( &rwork2, N );
                magma_dmalloc_cpu( &D2, N );
                magma_zmalloc_cpu( &AINIT, N*lda );
                memcpy(AINIT, h_A, N*lda*sizeof(magmaDoubleComplex));
                /* compute the eigenvalues using lapack routine to be able to compare to it and used as ref */
                cpu_time = magma_wtime();
                i= min(12, THREADS);
    
                #if defined(USEMKL)
                mkl_set_num_threads( i );
                #endif
                #if defined(USEACML)
                omp_set_num_threads(i);
                #endif
    
                lapackf77_zheev( "N", "L", &N, h_A, &lda, D2, work2, &lwork2,
                    #if defined(PRECISION_z) || defined (PRECISION_c)
                    rwork2,
                    #endif
                    &info );
                
                ///* call eigensolver for our resulting tridiag [D E] and for Q */
                //dstedc_withZ('V', N, D, E, h_R, lda);
                ////dsterf_( &N, D, E, &info);
                ////
                cpu_time = magma_wtime() - cpu_time;
                printf("  Finish CHECK - EIGEN   timing= %f  threads %d\n", cpu_time, i);
    
                /*
                for (i=0; i < 10; i++)
                    printf(" voici lpk D[%d] %8.2e\n", i, D2[i]);
                */
    
                //magmaDoubleComplex mydz=0.0, mydo=1.0;
                //magmaDoubleComplex *Z;
                // magma_zmalloc_cpu( &Z, N*lda );
                // dgemm_("N", "N", &N, &N, &N, &mydo, h_R, &lda, h_A, &lda, &mydz, Z, &lda);
    
    
                /* compare result */
                cmp_vals(N, D2, D, &nrmI, &nrm1, &nrm2);
    
    
                magmaDoubleComplex *WORKAJETER;
                double *RWORKAJETER, *RESU;
                // TODO free this memory !
                magma_zmalloc_cpu( &WORKAJETER, (2* N * N + N)  );
                magma_dmalloc_cpu( &RWORKAJETER, N  );
                magma_dmalloc_cpu( &RESU, 10 );
                int MATYPE;
                memset(RESU, 0, 10*sizeof(double));
    
     
                MATYPE=3;
                double NOTHING=0.0;
                cpu_time = magma_wtime();
                // check results
                zcheck_eig_(&JOBZ, &MATYPE, &N, &NB, AINIT, &lda, &NOTHING, &NOTHING, D2, D, h_R, &lda, WORKAJETER, RWORKAJETER, RESU );
                cpu_time = magma_wtime() - cpu_time;
                printf("  Finish CHECK - results timing= %f\n", cpu_time);
                #if defined(USEMKL)
                mkl_set_num_threads( 1 );
                #endif
                #if defined(USEACML)
                omp_set_num_threads(1);
                #endif
    
                printf("\n");
                printf(" ================================================================================================================\n");
                printf("   ==> INFO voici  threads=%d    N=%d    NB=%d   WANTZ=%d\n", (int) THREADS, (int) N, (int) NB, (int) WANTZ);
                printf(" ================================================================================================================\n");
                printf("            DSBTRD                : %15s \n", "STATblgv9withQ    ");
                printf(" ================================================================================================================\n");
                if (WANTZ > 0)
                    printf(" | A - U S U' | / ( |A| n ulp )   : %15.3E   \n", RESU[0]);
                if (WANTZ > 0)
                    printf(" | I - U U' | / ( n ulp )         : %15.3E   \n", RESU[1]);
                printf(" | D1 - EVEIGS | / (|D| ulp)      : %15.3E   \n",  RESU[2]);
                printf(" max | D1 - EVEIGS |              : %15.3E   \n",  RESU[6]);
                printf(" ================================================================================================================\n\n\n");
                
                printf(" ****************************************************************************************************************\n");
                printf(" * Hello here are the norm  Infinite (max)=%8.2e  norm one (sum)=%8.2e   norm2(sqrt)=%8.2e *\n", nrmI, nrm1, nrm2);
                printf(" ****************************************************************************************************************\n\n");
            }
#endif
#endif
            
            printf("  Total N %5d  gflops %6.2f        timing %6.2f seconds\n", (int) N, gpu_perf, gpu_time );
            printf("============================================================================\n\n\n");
            
            /* Memory clean up */
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_CPU( tau );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_work );
            TESTING_FREE_PIN( D      );
            TESTING_FREE_PIN( E      );
            
            TESTING_FREE_DEV( dT1 );
            
            /* TODO - not all memory has been freed inside loop */
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE_MGPU();
    return EXIT_SUCCESS;
}
Ejemplo n.º 25
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing strmm
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gflops, cublas_perf, cublas_time, cpu_perf, cpu_time;
    float          cublas_error, Cnorm, work[1];
    magma_int_t M, N;
    magma_int_t Ak;
    magma_int_t sizeA, sizeB;
    magma_int_t lda, ldb, ldda, lddb;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    
    float *h_A, *h_B, *h_Bcublas;
    float *d_A, *d_B;
    float c_neg_one = MAGMA_S_NEG_ONE;
    float alpha = MAGMA_S_MAKE(  0.29, -0.86 );
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    printf("If running lapack (option --lapack), MAGMA and CUBLAS error are both computed\n"
           "relative to CPU BLAS result. Else, MAGMA error is computed relative to CUBLAS result.\n\n"
           "side = %c, uplo = %c, transA = %c, diag = %c \n", opts.side, opts.uplo, opts.transA, opts.diag );
    printf("    M     N   CUBLAS Gflop/s (ms)   CPU Gflop/s (ms)  CUBLAS error\n");
    printf("==================================================================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            M = opts.msize[i];
            N = opts.nsize[i];
            gflops = FLOPS_STRMM(opts.side, M, N) / 1e9;

            if ( opts.side == MagmaLeft ) {
                lda = M;
                Ak = M;
            } else {
                lda = N;
                Ak = N;
            }
            
            ldb = M;
            
            ldda = ((lda+31)/32)*32;
            lddb = ((ldb+31)/32)*32;
            
            sizeA = lda*Ak;
            sizeB = ldb*N;
            
            TESTING_MALLOC( h_A,  float, lda*Ak );
            TESTING_MALLOC( h_B,  float, ldb*N  );
            TESTING_MALLOC( h_Bcublas, float, ldb*N  );
            
            TESTING_DEVALLOC( d_A, float, ldda*Ak );
            TESTING_DEVALLOC( d_B, float, lddb*N  );
            
            /* Initialize the matrices */
            lapackf77_slarnv( &ione, ISEED, &sizeA, h_A );
            lapackf77_slarnv( &ione, ISEED, &sizeB, h_B );
            
            /* =====================================================================
               Performs operation using CUDA-BLAS
               =================================================================== */
            magma_ssetmatrix( Ak, Ak, h_A, lda, d_A, ldda );
            magma_ssetmatrix( M, N, h_B, ldb, d_B, lddb );
            
            cublas_time = magma_sync_wtime( NULL );
            cublasStrmm( opts.side, opts.uplo, opts.transA, opts.diag,
                         M, N, 
                         alpha, d_A, ldda,
                                d_B, lddb );
            cublas_time = magma_sync_wtime( NULL ) - cublas_time;
            cublas_perf = gflops / cublas_time;
            
            magma_sgetmatrix( M, N, d_B, lddb, h_Bcublas, ldb );
            
            /* =====================================================================
               Performs operation using CPU BLAS
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                blasf77_strmm( &opts.side, &opts.uplo, &opts.transA, &opts.diag, 
                               &M, &N,
                               &alpha, h_A, &lda,
                                       h_B, &ldb );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
            }
            
            /* =====================================================================
               Check the result
               =================================================================== */
            if ( opts.lapack ) {
                // compute relative error for both magma & cublas, relative to lapack,
                // |C_magma - C_lapack| / |C_lapack|
                Cnorm = lapackf77_slange( "M", &M, &N, h_B, &ldb, work );
                
                blasf77_saxpy( &sizeB, &c_neg_one, h_B, &ione, h_Bcublas, &ione );
                cublas_error = lapackf77_slange( "M", &M, &N, h_Bcublas, &ldb, work ) / Cnorm;
                
                printf("%5d %5d   %7.2f (%7.2f)   %7.2f (%7.2f)    %8.2e\n",
                       (int) M, (int) N,
                       cublas_perf, 1000.*cublas_time,
                       cpu_perf,    1000.*cpu_time,
                       cublas_error );
            }
            else {
                printf("%5d %5d   %7.2f (%7.2f)    ---   (  ---  )    ---     ---\n",
                       (int) M, (int) N,
                       cublas_perf, 1000.*cublas_time);
            }
            
            TESTING_FREE( h_A  );
            TESTING_FREE( h_B  );
            TESTING_FREE( h_Bcublas );
            
            TESTING_DEVFREE( d_A );
            TESTING_DEVFREE( d_B );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return 0;
}
Ejemplo n.º 26
0
int main(int ac, char **av)
{
	int lc;
	char *ptr;		/* message returned from parse_opts */

	gid_t group;

	int i;
	int entries;		/* number of group entries */

	initgroups("root", 0);
	if ((ptr = parse_opts(ac, av, NULL, NULL)) != NULL)
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", ptr);

	setup();

	for (lc = 0; TEST_LOOPING(lc); lc++) {

		Tst_count = 0;

		TEST(getgroups(-1, gidset));

		if (TEST_RETURN == 0)
			tst_resm(TFAIL, "getgroups succeeded unexpectedly");
		else if (STD_FUNCTIONAL_TEST) {
			if (errno == EINVAL)
				tst_resm(TPASS,
					 "getgroups failed as expected with EINVAL");
			else
				tst_resm(TFAIL | TTERRNO,
					 "getgroups didn't fail as expected with EINVAL");
		}

		/*
		 * Check that if ngrps is zero that the number of groups is
		 * return and the the gidset array is not modified.
		 * This is a POSIX special case.
		 */

		memset(gidset, 052, NGROUPS);
		memset(cmpset, 052, NGROUPS);

		TEST(getgroups(0, gidset));
		if (TEST_RETURN == -1)
			tst_resm(TFAIL | TTERRNO, "getgroups failed");
		else if (STD_FUNCTIONAL_TEST) {
			if (memcmp(cmpset, gidset, NGROUPS) != 0)
				tst_resm(TFAIL,
					 "getgroups modified the gidset array");
			else
				tst_resm(TPASS,
					 "getgroups did not modify the gidset "
					 "array");
		}

		/*
		 * Check to see that is -1 is returned and errno is set to
		 * EINVAL when ngroups is not big enough to hold all groups.
		 */

		if (TEST_RETURN <= 1)
			tst_resm(TCONF,
				 "getgroups returned %ld; unable to test that using ngrps >=1 but less than number of grps",
				 TEST_RETURN);
		else {
			TEST(getgroups(TEST_RETURN - 1, gidset));
			if (TEST_RETURN == -1) {
				if (STD_FUNCTIONAL_TEST) {
					if (errno == EINVAL)
						tst_resm(TPASS,
							 "getgroups failed as "
							 "expected with EINVAL");
					else
						tst_resm(TFAIL | TERRNO,
							 "getgroups didn't fail "
							 "with EINVAL");
				}
			} else
				tst_resm(TFAIL,
					 "getgroups succeeded unexpectedly with %ld",
					 TEST_RETURN);
		}

		TEST(getgroups(NGROUPS, gidset));
		if ((entries = TEST_RETURN) == -1)
			tst_resm(TFAIL | TTERRNO,
				 "getgroups failed unexpectedly");
		else if (STD_FUNCTIONAL_TEST) {

			group = getgid();

			for (i = 0; i < entries; i++)
				if (gidset[i] == group) {
					tst_resm(TPASS,
						 "getgroups(NGROUPS,gidset) "
						 "returned %d contains gid %d "
						 "(from getgid)",
						 entries, group);
					break;
				}

			if (i == entries)
				tst_resm(TFAIL,
					 "getgroups(NGROUPS,gidset) ret %d, does "
					 "not contain gid %d (from getgid)",
					 entries, group);
		}

	}
	cleanup();

	tst_exit();
}
Ejemplo n.º 27
0
int
main(int ac, char **av)
{
	int lc;			/* loop counter */
	char *msg;		/* message returned from parse_opts */
	pid_t cpid;		/* process id of the child process */
	int exit_status;	/* exit status of child process */
	sigset_t PendSig;	/* variable to hold pending signal */
    
	/* Parse standard options given to run the test. */
	msg = parse_opts(ac, av, (option_t *) NULL, NULL);
	if (msg != (char *) NULL) {
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
		tst_exit();
	}

	/* Perform global setup for test */
	setup();

	/* set the expected errnos... */
	TEST_EXP_ENOS(exp_enos);

	/* Check looping state if -i option given */
	for (lc = 0; TEST_LOOPING(lc); lc++) {
		/* Reset Tst_count in case we are looping. */
		Tst_count=0;

		/* 
		 * Call vfork(2) to create a child process without
		 * fully copying the address space of parent.
		 */
		TEST(vfork());

		/* check return code of vfork() */
		if ((cpid = TEST_RETURN) == -1) {
			TEST_ERROR_LOG(TEST_ERRNO);
			tst_resm(TFAIL, "vfork() Failed, errno=%d : %s",
				 TEST_ERRNO, strerror(TEST_ERRNO));
		} else if (cpid == 0) {		/* Child process */
			/*
			 * Perform functional verification if test
			 * executed without (-f) option.
			 */
			if (STD_FUNCTIONAL_TEST) {
				/*
				 * Check whether the pending signal SIGUSR1
				 * in the parent is also pending in the child
				 * process by storing it in a variable.
				 */
				if (sigpending(&PendSig) == -1) {
					tst_resm(TFAIL, "sigpending function "
						"failed in child");
					_exit(1);
				}

				/* Check if SIGUSR1 is pending in child */
				if (sigismember(&PendSig, SIGUSR1) != 0) {
					tst_resm(TFAIL, "SIGUSR1 also pending "
						"in child process");
					_exit(1);
				}

				/*
				 * Exit with normal exit code if everything
				 * fine
				 */
				_exit(0);
			}
		} else {	/* parent process */
			/*
			 * Let the parent process wait till child completes
			 * its execution.
			 */
			wait(&exit_status);

			/* Check for the exit status of child process */
			if (WEXITSTATUS(exit_status) == 0) {
				tst_resm(TPASS, "Call to vfork() "
					"successful");
			} else if (WEXITSTATUS(exit_status) == 1) {
				tst_resm(TFAIL, \
					 "Child process exited abnormally");
			}
		}
		Tst_count++;			/* incr. TEST_LOOP counter */
	}	/* End for TEST_LOOPING */

	/* Call cleanup() to undo setup done for the test. */
	cleanup();

	return 0;
}	/* End main */
Ejemplo n.º 28
0
Archivo: kill12.c Proyecto: Nudiv/ltp
/*--------------------------------------------------------------------*/
int main(int argc, char **argv)
{
/***** BEGINNING OF MAIN. *****/
	int pid, npid;
	int nsig, exno, nexno, status;
	int ret_val = 0;
	int core;
	void chsig();

#ifdef UCLINUX
	const char *msg;

	if ((msg = parse_opts(argc, argv, NULL, NULL)) != NULL) {
		tst_brkm(TBROK, NULL, "OPTION PARSING ERROR - %s", msg);
	}

	maybe_run_child(&do_child, "dd", &temp, &sig);
#endif

	setup();
	//tempdir();            /* move to new directory */ 12/20/2003
	blenter();

	exno = 1;

	if (sigset(SIGCLD, chsig) == SIG_ERR) {
		fprintf(temp, "\tsigset failed, errno = %d\n", errno);
		fail_exit();
	}

	for (sig = 1; sig < 14; sig++) {
		fflush(temp);
		chflag = 0;

		pid = FORK_OR_VFORK();
		if (pid < 0) {
			forkfail();
		}

		if (pid == 0) {
#ifdef UCLINUX
			if (self_exec(argv[0], "dd", temp, sig) < 0) {
				tst_resm(TBROK, "self_exec FAILED - "
					 "terminating test.");
				tst_exit();
			}
#else
			do_child();
#endif
		} else {
			//fprintf(temp, "Testing signal %d\n", sig);

			while (!chflag)	/* wait for child */
				sleep(1);

			kill(pid, sig);	/* child should ignroe this sig */
			kill(pid, SIGCLD);	/* child should exit */

#ifdef BCS
			while ((npid = wait(&status)) != pid
			       || (npid == -1 && errno == EINTR)) ;
			if (npid != pid) {
				fprintf(temp,
					"wait error: wait returned wrong pid\n");
				ret_val = 1;
			}
#else
			while ((npid = waitpid(pid, &status, 0)) != -1
			       || errno == EINTR) ;
#endif

			/*
			   nsig = status & 0177;
			   core = status & 0200;
			   nexno = (status & 0xff00) >> 8;
			 */
			/*****  LTP Port        *****/
			nsig = WTERMSIG(status);
#ifdef WCOREDUMP
			core = WCOREDUMP(status);
#endif
			nexno = WIFEXITED(status);
			/*****  **      **      *****/

			/* nsig is the signal number returned by wait
			   it should be 0, except when sig = 9          */

			if ((sig == 9) && (nsig != sig)) {
				fprintf(temp, "wait error: unexpected signal"
					" returned when the signal sent was 9"
					" The status of the process is %d \n",
					status);
				ret_val = 1;
			}
			if ((sig != 9) && (nsig != 0)) {
				fprintf(temp, "wait error: unexpected signal "
					"returned, the status of the process is "
					"%d  \n", status);
				ret_val = 1;
			}

			/* nexno is the exit number returned by wait
			   it should be 1, except when sig = 9          */

			if (sig == 9)
				if (nexno != 0) {
					fprintf(temp, "signal error: unexpected"
						" exit number returned when"
						" signal sent was 9, the status"
						" of the process is %d \n",
						status);
					ret_val = 1;
				} else;
			else if (nexno != 1) {
				fprintf(temp, "signal error: unexpected exit "
					"number returned,the status of the"
					" process is %d\n", status);
				ret_val = 1;
			}
		}
	}
	if (ret_val)
		local_flag = FAILED;

/*--------------------------------------------------------------------*/
	anyfail();
	tst_exit();
}					/******** END OF MAIN. ********/
Ejemplo n.º 29
0
/**
 * trans_create_rdma - Transport method for creating atransport instance
 * @client: client instance
 * @addr: IP address string
 * @args: Mount options string
 */
static int
rdma_create_trans(struct p9_client *client, const char *addr, char *args)
{
	int err;
	struct p9_rdma_opts opts;
	struct p9_trans_rdma *rdma;
	struct rdma_conn_param conn_param;
	struct ib_qp_init_attr qp_attr;
	struct ib_device_attr devattr;

	/* Parse the transport specific mount options */
	err = parse_opts(args, &opts);
	if (err < 0)
		return err;

	/* Create and initialize the RDMA transport structure */
	rdma = alloc_rdma(&opts);
	if (!rdma)
		return -ENOMEM;

	/* Create the RDMA CM ID */
	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
	if (IS_ERR(rdma->cm_id))
		goto error;

	/* Associate the client with the transport */
	client->trans = rdma;

	/* Resolve the server's address */
	rdma->addr.sin_family = AF_INET;
	rdma->addr.sin_addr.s_addr = in_aton(addr);
	rdma->addr.sin_port = htons(opts.port);
	err = rdma_resolve_addr(rdma->cm_id, NULL,
				(struct sockaddr *)&rdma->addr,
				rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
		goto error;

	/* Resolve the route to the server */
	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
		goto error;

	/* Query the device attributes */
	err = ib_query_device(rdma->cm_id->device, &devattr);
	if (err)
		goto error;

	/* Create the Completion Queue */
	rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler,
				cq_event_handler, client,
				opts.sq_depth + opts.rq_depth + 1, 0);
	if (IS_ERR(rdma->cq))
		goto error;
	ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP);

	/* Create the Protection Domain */
	rdma->pd = ib_alloc_pd(rdma->cm_id->device);
	if (IS_ERR(rdma->pd))
		goto error;

	/* Cache the DMA lkey in the transport */
	rdma->dma_mr = NULL;
	if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
		rdma->lkey = rdma->cm_id->device->local_dma_lkey;
	else {
		rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE);
		if (IS_ERR(rdma->dma_mr))
			goto error;
		rdma->lkey = rdma->dma_mr->lkey;
	}

	/* Create the Queue Pair */
	memset(&qp_attr, 0, sizeof qp_attr);
	qp_attr.event_handler = qp_event_handler;
	qp_attr.qp_context = client;
	qp_attr.cap.max_send_wr = opts.sq_depth;
	qp_attr.cap.max_recv_wr = opts.rq_depth;
	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
	qp_attr.qp_type = IB_QPT_RC;
	qp_attr.send_cq = rdma->cq;
	qp_attr.recv_cq = rdma->cq;
	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
	if (err)
		goto error;
	rdma->qp = rdma->cm_id->qp;

	/* Request a connection */
	memset(&conn_param, 0, sizeof(conn_param));
	conn_param.private_data = NULL;
	conn_param.private_data_len = 0;
	conn_param.responder_resources = P9_RDMA_IRD;
	conn_param.initiator_depth = P9_RDMA_ORD;
	err = rdma_connect(rdma->cm_id, &conn_param);
	if (err)
		goto error;
	err = wait_for_completion_interruptible(&rdma->cm_done);
	if (err || (rdma->state != P9_RDMA_CONNECTED))
		goto error;

	client->status = Connected;

	return 0;

error:
	rdma_destroy_trans(rdma);
	return -ENOTCONN;
}
Ejemplo n.º 30
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zhegvdx
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gpu_time /*cpu_time*/;
    magmaDoubleComplex *h_A, *h_R, *h_B, *h_S, *h_work;
    double *w1, *w2, vl=0, vu=0;
    double result[2] = {0};
    magma_int_t *iwork;
    magma_int_t N, n2, info, il, iu, m1, m2, nb, lwork, liwork;
    magmaDoubleComplex c_zero    = MAGMA_Z_ZERO;
    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    #if defined(PRECISION_z) || defined(PRECISION_c)
    double *rwork;
    magma_int_t lrwork;
    #endif
    //double d_one         =  1.;
    //double d_ten         = 10.;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );

    double tol    = opts.tolerance * lapackf77_dlamch("E");
    double tolulp = opts.tolerance * lapackf77_dlamch("P");
    
    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }
    
    printf("using: itype = %d, jobz = %s, uplo = %s, check = %d, fraction = %6.4f\n",
           (int) opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo),
           (int) opts.check, opts.fraction);

    printf("    N     M   GPU Time (sec)\n");
    printf("============================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            n2     = N*N;
            nb     = magma_get_zhetrd_nb(N);
            #if defined(PRECISION_z) || defined(PRECISION_c)
            lwork  = 2*N*nb + N*N;
            lrwork = 1 + 5*N +2*N*N;
            #else
            lwork  = 1 + 6*N*nb + 2* N*N;
            #endif
            liwork = 3 + 5*N;

            if ( opts.fraction == 0 ) {
                il = N / 10;
                iu = N / 5+il;
            }
            else {
                il = 1;
                iu = (int) (opts.fraction*N);
                if (iu < 1) iu = 1;
            }

            TESTING_MALLOC_CPU( h_A,    magmaDoubleComplex, n2     );
            TESTING_MALLOC_CPU( h_B,    magmaDoubleComplex, n2     );
            TESTING_MALLOC_CPU( w1,     double,             N      );
            TESTING_MALLOC_CPU( w2,     double,             N      );
            TESTING_MALLOC_CPU( iwork,  magma_int_t,        liwork );
            
            TESTING_MALLOC_PIN( h_R,    magmaDoubleComplex, n2     );
            TESTING_MALLOC_PIN( h_S,    magmaDoubleComplex, n2     );
            TESTING_MALLOC_PIN( h_work, magmaDoubleComplex, lwork  );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_MALLOC_PIN( rwork, double, lrwork);
            #endif
            
            /* Initialize the matrix */
            lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
            lapackf77_zlarnv( &ione, ISEED, &n2, h_B );
            magma_zmake_hpd( N, h_B, N );
            magma_zmake_hermitian( N, h_A, N );

            // ==================================================================
            // Warmup using MAGMA
            // ==================================================================
            if(opts.warmup){
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
                
                magma_zhegvdx( opts.itype, opts.jobz, MagmaRangeI, opts.uplo,
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                               h_work, lwork,
                               #if defined(PRECISION_z) || defined(PRECISION_c)
                               rwork, lrwork,
                               #endif      
                               iwork, liwork,
                               &info );
                if (info != 0)
                    printf("magma_zhegvdx returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
            }
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
            lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );

            gpu_time = magma_wtime();
            magma_zhegvdx( opts.itype, opts.jobz, MagmaRangeI, opts.uplo,
                           N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                           h_work, lwork,
                           #if defined(PRECISION_z) || defined(PRECISION_c)
                           rwork, lrwork,
                           #endif
                           iwork, liwork,
                           &info );
            gpu_time = magma_wtime() - gpu_time;
            if (info != 0)
                printf("magma_zhegvdx returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            if ( opts.check ) {
                /* =====================================================================
                   Check the results following the LAPACK's [zc]hegvdx routine.
                   A x = lambda B x is solved
                   and the following 3 tests computed:
                   (1)    | A Z - B Z D | / ( |A||Z| N )  (itype = 1)
                          | A B Z - Z D | / ( |A||Z| N )  (itype = 2)
                          | B A Z - Z D | / ( |A||Z| N )  (itype = 3)
                   (2)    | S(with V) - S(w/o V) | / | S |
                   =================================================================== */
                #if defined(PRECISION_d) || defined(PRECISION_s)
                double *rwork = h_work + N*N;
                #endif
                double temp1, temp2;
                
                result[0] = 1.;
                result[0] /= lapackf77_zlanhe("1", lapack_uplo_const(opts.uplo), &N, h_A, &N, rwork);
                result[0] /= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork);
                
                if (opts.itype == 1) {
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N);
                    result[0] *= lapackf77_zlange("1", &N, &m1, h_work, &N, rwork)/N;
                }
                else if (opts.itype == 2) {
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N);
                    result[0] *= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork)/N;
                }
                else if (opts.itype == 3) {
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N);
                    result[0] *= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork)/N;
                }
                
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
                
                magma_zhegvdx( opts.itype, MagmaNoVec, MagmaRangeI, opts.uplo,
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m2, w2,
                               h_work, lwork,
                               #if defined(PRECISION_z) || defined(PRECISION_c)
                               rwork, lrwork,
                               #endif
                               iwork, liwork,
                               &info );
                if (info != 0)
                    printf("magma_zhegvdx returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                temp1 = temp2 = 0;
                for(int j=0; j < m2; j++) {
                    temp1 = max(temp1, absv(w1[j]));
                    temp1 = max(temp1, absv(w2[j]));
                    temp2 = max(temp2, absv(w1[j]-w2[j]));
                }
                result[1] = temp2 / (((double)m2)*temp1);
            }
            
            /* =====================================================================
               Print execution time
               =================================================================== */
            printf("%5d %5d   %7.2f\n",
                   (int) N, (int) m1, gpu_time);
            if ( opts.check ) {
                printf("Testing the eigenvalues and eigenvectors for correctness:\n");
                if (opts.itype == 1) {
                    printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed"));
                }
                else if (opts.itype == 2) {
                    printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed"));
                }
                else if (opts.itype == 3) {
                    printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed"));
                }
                printf(    "(2)    | D(w/ Z) - D(w/o Z) | / |D|  = %8.2e   %s\n\n", result[1], (result[1] < tolulp ? "ok" : "failed"));
                status += ! (result[0] < tol && result[1] < tolulp);
            }
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_CPU( h_B );
            TESTING_FREE_CPU( w1  );
            TESTING_FREE_CPU( w2  );
            TESTING_FREE_CPU( iwork );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_S    );
            TESTING_FREE_PIN( h_work );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_FREE_PIN( rwork );
            #endif
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}