Beispiel #1
0
void magmaf_ssyevd(
    magma_vec_t *jobz, magma_uplo_t *uplo, magma_int_t *n,
    float *a, magma_int_t *lda,
    float *w,
    float *work, magma_int_t *lwork,
    magma_int_t *iwork, magma_int_t *liwork,
    magma_int_t *info, magma_queue_t *queue )
{
    magma_ssyevd(
        *jobz, *uplo, *n,
        a, *lda,
        w,
        work, *lwork,
        iwork, *liwork,
        info, *queue );
}
Beispiel #2
0
std::vector<EigenComponent> solver_magma(const Eigen::MatrixXf& A, unsigned int num_ev)
{
	static MagmaSpectralSolver magma;

	magma_int_t N = A.rows();
	std::cout << "MAGMA Solver N=" << N << std::endl;

	magma_timestr_t start, end;
	float gpu_time;
	start = get_current_time();

	magma_int_t info;

	const float *h_A = A.data();

	float *h_R, *h_work;
	float *w1;
	magma_int_t *iwork;

	const char *uplo = MagmaLowerStr;
	const char *jobz = MagmaVecStr;

	/* Query for workspace sizes */
	float      aux_work[1];
	magma_int_t aux_iwork[1];
	std::cout << "Querying workspace size" << std::endl;
	magma_ssyevd( jobz[0], uplo[0],
	              N, h_R, N, w1,
	              aux_work,  -1,
	              aux_iwork, -1,
	              &info );
	magma_int_t lwork  = (magma_int_t) aux_work[0];
	magma_int_t liwork = aux_iwork[0];
	std::cout << lwork << " " << liwork << std::endl;

	std::cout << "Allocating" << std::endl;
	w1     = magma.malloc<float>(N  );
	h_R    = magma.hostmalloc<float>(N*N);
	h_work = magma.hostmalloc<float>(lwork);
	iwork  = magma.malloc<magma_int_t>(liwork);

	std::cout << "Copying" << std::endl;
	slacpy_( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );

	std::cout << "Solving" << std::endl;
	magma_ssyevd(jobz[0], uplo[0],
	         N, h_R, N, w1,
	         h_work, lwork,
	         iwork, liwork,
	         &info);

	std::cout << "Collecting" << std::endl;
	// save eigenvectors and eigenvalues
	std::vector<EigenComponent> solution(std::min<int>(N, num_ev));
	for(unsigned int i=0; i<solution.size(); i++) {
		solution[i].eigenvalue = w1[i+1];
		Eigen::VectorXf ev(N);
		for(unsigned int j=0; j<N; j++) {
			ev[j] = *(h_R + i*N + j);
		}
		solution[i].eigenvector = ev;
	}

	std::cout << "Freeing" << std::endl;
	magma.free(w1);
	magma.hostfree(h_R);
	magma.hostfree(h_work);
	magma.free(iwork);

	end = get_current_time();

	gpu_time = GetTimerValue(start,end)/1000.;
	std::cout << "Time: " << gpu_time << std::endl;

	return solution;
}
Beispiel #3
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing ssyevd
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gpu_time, cpu_time;
    float *h_A, *h_R, *h_work;
    float *w1, *w2;
    magma_int_t *iwork;
    magma_int_t N, n2, info, lwork, liwork, lda, aux_iwork[1];
    magma_int_t izero    = 0;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    float result[3], eps, aux_work[1];
    eps = lapackf77_slamch( "E" );
    magma_int_t status = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );

    float tol    = opts.tolerance * lapackf77_slamch("E");
    float tolulp = opts.tolerance * lapackf77_slamch("P");
    
    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }
    
    printf("using: jobz = %s, uplo = %s\n",
           lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo));

    printf("    N   CPU Time (sec)   GPU Time (sec)\n");
    printf("=======================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            n2  = N*N;
            lda = N;
            
            // query for workspace sizes
            magma_ssyevd( opts.jobz, opts.uplo,
                          N, NULL, lda, NULL,
                          aux_work,  -1,
                          aux_iwork, -1,
                          &info );
            lwork  = (magma_int_t) aux_work[0];
            liwork = aux_iwork[0];
            
            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( h_A,    float, N*lda );
            TESTING_MALLOC_CPU( w1,     float, N     );
            TESTING_MALLOC_CPU( w2,     float, N     );
            TESTING_MALLOC_CPU( iwork,  magma_int_t, liwork );
            
            TESTING_MALLOC_PIN( h_R,    float, N*lda  );
            TESTING_MALLOC_PIN( h_work, float, lwork  );
            
            /* Initialize the matrix */
            lapackf77_slarnv( &ione, ISEED, &n2, h_A );
            lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
            
            /* warm up run */
            if ( opts.warmup ) {
                magma_ssyevd( opts.jobz, opts.uplo,
                              N, h_R, lda, w1,
                              h_work, lwork,
                              iwork, liwork,
                              &info );
                if (info != 0)
                    printf("magma_ssyevd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_ssyevd( opts.jobz, opts.uplo,
                          N, h_R, lda, w1,
                          h_work, lwork,
                          iwork, liwork,
                          &info );
            gpu_time = magma_wtime() - gpu_time;
            if (info != 0)
                printf("magma_ssyevd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            if ( opts.check ) {
                /* =====================================================================
                   Check the results following the LAPACK's [zcds]drvst routine.
                   A is factored as A = U S U' and the following 3 tests computed:
                   (1)    | A - U S U' | / ( |A| N )
                   (2)    | I - U'U | / ( N )
                   (3)    | S(with U) - S(w/o U) | / | S |
                   =================================================================== */
                float temp1, temp2;
                
                // tau=NULL is unused since itype=1
                lapackf77_ssyt21( &ione, lapack_uplo_const(opts.uplo), &N, &izero,
                                  h_A, &lda,
                                  w1, h_work,
                                  h_R, &lda,
                                  h_R, &lda,
                                  NULL, h_work, &result[0] );
                
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
                magma_ssyevd( MagmaNoVec, opts.uplo,
                              N, h_R, lda, w2,
                              h_work, lwork,
                              iwork, liwork,
                              &info );
                if (info != 0)
                    printf("magma_ssyevd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                temp1 = temp2 = 0;
                for( int j=0; j<N; j++ ) {
                    temp1 = max(temp1, fabsf(w1[j]));
                    temp1 = max(temp1, fabsf(w2[j]));
                    temp2 = max(temp2, fabsf(w1[j]-w2[j]));
                }
                result[2] = temp2 / (((float)N)*temp1);
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_ssyevd( lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo),
                                  &N, h_A, &lda, w2,
                                  h_work, &lwork,
                                  iwork, &liwork,
                                  &info );
                cpu_time = magma_wtime() - cpu_time;
                if (info != 0)
                    printf("lapackf77_ssyevd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                printf("%5d   %7.2f          %7.2f\n",
                       (int) N, cpu_time, gpu_time);
            }
            else {
                printf("%5d     ---            %7.2f\n",
                       (int) N, gpu_time);
            }
            
            /* =====================================================================
               Print execution time
               =================================================================== */
            if ( opts.check ) {
                printf("Testing the factorization A = U S U' for correctness:\n");
                printf("(1)    | A - U S U' | / (|A| N)     = %8.2e   %s\n",   result[0]*eps, (result[0]*eps < tol ? "ok" : "failed") );
                printf("(2)    | I -   U'U  | /  N          = %8.2e   %s\n",   result[1]*eps, (result[1]*eps < tol ? "ok" : "failed") );
                printf("(3)    | S(w/ U) - S(w/o U) | / |S| = %8.2e   %s\n\n", result[2]    , (result[2]  < tolulp ? "ok" : "failed") );
                status += ! (result[0]*eps < tol && result[1]*eps < tol && result[2] < tolulp);
            }
            
            TESTING_FREE_CPU( h_A   );
            TESTING_FREE_CPU( w1    );
            TESTING_FREE_CPU( w2    );
            TESTING_FREE_CPU( iwork );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_work );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }
    
    TESTING_FINALIZE();
    return status;
}