Beispiel #1
0
void setauxvar(bodyptr btab, int nbody)
{
    bodyptr bp;
    vector jvec;
    real jtot, etot, r0, r1, r;

    if (streq(getparam("auxvar"), "mass"))
	for (bp = btab; bp < NthBody(btab, nbody); bp = NextBody(bp))
	    Aux(bp) = mass_gsp(ggsp, absv(Pos(bp)));
    else if (streq(getparam("auxvar"), "rperi"))
	for (bp = btab; bp < NthBody(btab, nbody); bp = NextBody(bp)) {
	    CROSSVP(jvec, Pos(bp), Vel(bp));
	    jtot = absv(jvec);
	    etot = 0.5 * dotvp(Vel(bp), Vel(bp)) + Phi(bp);
	    r0 = 0.0;
	    r1 = absv(Pos(bp));
	    r = 0.5 * (r0 + r1);
	    while ((r1 - r0) > TOL * r) {
		if (rsqrt(2 * (etot - phi_gsp(ggsp, r))) > jtot/r)
		    r1 = r;
		else
		    r0 = r;
		r = 0.5 * (r0 + r1);
	    }
	    Aux(bp) = r;
	}
    else
	error("%s: unknown auxvar option %s\n",
	      getargv0(), getparam("auxvar"));
}
Beispiel #2
0
// -------- begin of function Music::play ---------//
// <int> songId
// <int> playType   0 = non-looped, 1 = looped
int Music::play(int songId, int playType)
{
	if( !init_flag )
		return 0;

	stop();
		if( audio.wav_init_flag )
		{
			String waveFileStr(DIR_MUSIC);
			waveFileStr += music_file[songId-1];
			if( !DIR_MUSIC[0] || !m.is_file_exist(waveFileStr) || !audio.wav_init_flag )
				return 0;
			if( playType & MUSIC_PLAY_LOOPED )
                        {
                                AbsVolume absv(config.wav_music_volume,0);
                                music_channel = audio.play_loop_wav(waveFileStr, absv );
                        }
			else
                        {
                                AbsVolume absv(config.wav_music_volume,0);
                                music_channel = audio.play_long_wav(waveFileStr, absv );
                        }
			play_type = playType;
			song_id = songId;
			return music_channel >= 0;
		}
		return 0;
}
Beispiel #3
0
int radrank(const void *a, const void *b)
{
  real Ra, Rb;

  Ra = absv(Pos((bodyptr) a));
  Rb = absv(Pos((bodyptr) b));
  return (Ra < Rb ? -1 : Ra > Rb ? 1 : 0);
}
Beispiel #4
0
int len4(a, b, c, d)
    {
    absv(a); absv(b);			/* get the absolute values */
    absv(c); absv(d);			/* (component magnitudes) */
    inorder(a, b); inorder(c, d);	/* everyone has a chance to play */
    inorder(a, c); inorder(b, d);	/* (a,d) are big (winner, loser) */
    inorder(b, c);			/* playoff for 2nd and 3rd slots */
    a += (25*b + 19*c + 16*d)/60;	/* compute 4D approximate length */
/*  a +=  (5*b +  4*c +  3*d)/12;	.. only .1% worse; easy to eval  */
    a++;				/* Roundoff -> underestimation   */
    return(a);				/* omit the above one bit jitter */
    }
Beispiel #5
0
// -------- begin of function Music::play ---------//
// <int> songId
// <int> playType   0 = non-looped, 1 = looped
int Music::play(int songId, int playType)
{
	if( !init_flag )
		return 0;

	stop();

#ifdef BUNDLE
	// disable CD music
	playType &= ~MUSIC_CD_THEN_WAV & ~MUSIC_PLAY_CD;
#endif

	if( playType & MUSIC_CD_THEN_WAV )
	{
		return play(songId, playType & ~MUSIC_CD_THEN_WAV | MUSIC_PLAY_CD) 
			|| play(songId, playType & ~MUSIC_CD_THEN_WAV & ~MUSIC_PLAY_CD);
	}
	else if( playType & MUSIC_PLAY_CD )
	{
		if( audio.cd_init_flag && audio.play_cd(songId +1, config.cd_music_volume) ) // skip the first data track
		{
			play_type = playType;
			song_id = songId;
			music_channel = 1;
			return 1;
		}
		return 0;
	}
	else 
	{
		if( audio.wav_init_flag )
		{
			String waveFileStr(DIR_MUSIC);
			waveFileStr += music_file[songId-1];
			if( !DIR_MUSIC[0] || !misc.is_file_exist(waveFileStr) || !audio.wav_init_flag )
				return 0;
			if( playType & MUSIC_PLAY_LOOPED )
			{
				AbsVolume absv(config.wav_music_volume,0);
				music_channel = audio.play_loop_wav(waveFileStr, 0, absv );
			}
			else
			{
				AbsVolume absv(config.wav_music_volume,0);
				music_channel = audio.play_long_wav(waveFileStr, absv );
			}
			play_type = playType;
			song_id = songId;
			return music_channel != 0;
		}
		return 0;
	}
}
Beispiel #6
0
int
multimut (int oribase)
{
  int mutbase;
  double _uni;
  _uni = ran1 (idum);
  if (oribase == 3)
    _uni *= mutmatrix[3][2];
  else
    _uni *= mutmatrix[oribase][3];
  for (mutbase = 0; mutbase < 4; mutbase++)
    {
      //printf ("(%f,%f)\t",_uni,mutmatrix[oribase][mutbase]);
      if (_uni <= mutmatrix[oribase][mutbase])
        {
          if ((absv (mutbase - oribase)) == 2)
            numutstrans++;
          else
            numutstranv++;
          return (mutbase);
        }
    }
  printf ("error in multimut\n");
  myassert (0);
  exit (-1);
}
Beispiel #7
0
void polymodel(void)
{
  gsl_interp_accel *pmsplacc = gsl_interp_accel_alloc();
  bodyptr p;
  real rad, phi, vel, psi, vr, vp, a, E, J;
  vector rhat, vtmp, vper;

  for (p = btab; p < NthBody(btab, nbody); p = NextBody(p)) {
    rad = rad_m(xrandom(0.0, mtot));
    phi = gsl_spline_eval(pmspline, (double) rad, pmsplacc);
    vel = pick_v(phi);
    psi = pick_psi();
    vr = vel * rcos(psi);
    vp = vel * rsin(psi);
    Mass(p) = mtot / nbody;
    pickshell(rhat, NDIM, 1.0);
    MULVS(Pos(p), rhat, rad);
    pickshell(vtmp, NDIM, 1.0);
    a = dotvp(vtmp, rhat);
    MULVS(vper, rhat, - a);
    ADDV(vper, vper, vtmp);
    a = absv(vper);
    MULVS(vper, vper, vp / a);
    MULVS(Vel(p), rhat, vr);
    ADDV(Vel(p), Vel(p), vper);
    Phi(p) = phi;
    E = phi + 0.5 * rsqr(vel);
    J = rad * ABS(vp);
    Aux(p) = Kprime * rpow(phi1 - E, npol - 1.5) * rpow(J, 2 * mpol);
  }
  gsl_interp_accel_free(pmsplacc);
}
Beispiel #8
0
int main(int argc, string argv[])
{
  stream fstr, istr, ostr;
  gsprof *gsp;
  bodyptr btab = NULL, p;
  int nbody;
  real tnow, r;
  string intags[MaxBodyFields];

  initparam(argv, defv);
  layout_body(bodyfields, Precision, NDIM);
  fstr = stropen(getparam("gsp"), "r");
  get_history(fstr);
  gsp = get_gsprof(fstr);
  istr = stropen(getparam("in"), "r");
  get_history(istr);
  if (! get_snap(istr, &btab, &nbody, &tnow, intags, TRUE))
    error("%s: snapshot input failed\n", getargv0());
  if (! set_member(intags, PosTag))
    error("%s: position data missing\n", getargv0());
  if (streq(getparam("option"), "rho"))
    for (p = btab; p < NthBody(btab, nbody); p = NextBody(p))
      Aux(p) = rho_gsp(gsp, absv(Pos(p)));
  else if (streq(getparam("option"), "drho"))
    for (p = btab; p < NthBody(btab, nbody); p = NextBody(p))
      Aux(p) = drho_gsp(gsp, absv(Pos(p)));
  else if (streq(getparam("option"), "mass"))
    for (p = btab; p < NthBody(btab, nbody); p = NextBody(p))
      Aux(p) = mass_gsp(gsp, absv(Pos(p)));
  else if (streq(getparam("option"), "phi"))
    for (p = btab; p < NthBody(btab, nbody); p = NextBody(p))
      Aux(p) = phi_gsp(gsp, absv(Pos(p)));
  else 
    error("%s: unknown option %s\n", getargv0(), getparam("option"));
  if (! strnull(getparam("out"))) {
    ostr = stropen(getparam("out"), "w");
    put_history(ostr);
    put_snap(ostr, &btab, &nbody, &tnow, set_union(bodyfields, intags));
    strclose(ostr);
  }
  return (0);
}
Beispiel #9
0
// -------- begin of function Music::change_volume --------//
void Music::change_volume(int vol)
{
	if( !init_flag )
		return;

	if( is_playing() )
	{
                AbsVolume absv(vol,0);
                audio.volume_long_wav(music_channel, DsVolume(absv));
	}
}
Beispiel #10
0
void picktriad(vector x, vector y, vector z)
{
    real a;

    pickshell(x, NDIM, 1.0);
    pickshell(z, NDIM, 1.0);
    CROSSVP(y, x, z);
    a = absv(y);
    DIVVS(y, y, a);
    CROSSVP(z, x, y);
}
Beispiel #11
0
local void gspforces(bodyptr btab, int nbody, gsprof *gravgsp)
{
  bodyptr bp;
  real r, mr3i;

  for (bp = btab; bp < NthBody(btab, nbody); bp = NextBody(bp)) {
    r = absv(Pos(bp));
    Phi(bp) = phi_gsp(gravgsp, r);
    mr3i = mass_gsp(gravgsp, r) / rqbe(r);
    MULVS(Acc(bp), Pos(bp), -mr3i);
  }
}
Beispiel #12
0
local void hqmforces(bodyptr btab, int nbody, real M, real a, real b,
		     real tol)
{
  bodyptr bp;
  double r, mr3i, params[4], phi0, aR0, az0, abserr[3];
  static gsl_integration_workspace *wksp = NULL;
  gsl_function FPhi, F_aR, F_az;
  static double maxerr = 0.0;
  int stat[3];

  if (a == b) {					// spherical case is easy!
    for (bp = btab; bp < NthBody(btab, nbody); bp = NextBody(bp)) {
      r = absv(Pos(bp));
      Phi(bp) = - M / (a + r);
      mr3i = M * rsqr(r / (a + r)) / rqbe(r);
      MULVS(Acc(bp), Pos(bp), - mr3i);
    }
  } else {					// flattened case is harder
    if (wksp == NULL) {				// on first call, initialze
      wksp = gsl_integration_workspace_alloc(1000);
      gsl_set_error_handler_off();		// handle errors below
    }
    FPhi.function = &intPhi;
    F_aR.function = &int_aR;
    F_az.function = &int_az;
    FPhi.params = F_aR.params = F_az.params = params;
    a2(params) = rsqr(a);
    b2(params) = rsqr(b);
    for (bp = btab; bp < NthBody(btab, nbody); bp = NextBody(bp)) {
      R(params) = rsqrt(rsqr(Pos(bp)[0]) + rsqr(Pos(bp)[1]));
      z(params) = Pos(bp)[2];
      stat[0] = gsl_integration_qagiu(&FPhi, 0.0, tol, 0.0,
				      1000, wksp, &phi0, &abserr[0]);
      stat[1] = gsl_integration_qagiu(&F_aR, 0.0, tol, 0.0,
				      1000, wksp, &aR0, &abserr[1]);
      stat[2] = gsl_integration_qagiu(&F_az, 0.0, tol, 0.0,
				      1000, wksp, &az0, &abserr[2]);
      if (stat[0] || stat[1] || stat[2])	// any errors reported?
	for (int i = 0; i < 3; i++)
	  if (stat[i] != 0 && abserr[i] > maxerr) {
	    eprintf("[%s.hqmforces: warning: %s  abserr[%d] = %g]\n",
		    getprog(), gsl_strerror(stat[i]), i+1, abserr[i]);
	    maxerr = abserr[i];			// adjust reporting threshold
	  }
      Phi(bp) = - M * phi0;
      Acc(bp)[0] = - M * (Pos(bp)[0] / R(params)) * aR0;
      Acc(bp)[1] = - M * (Pos(bp)[1] / R(params)) * aR0;
      Acc(bp)[2] = - M * az0;
    }
  }
}
Beispiel #13
0
static int crossn (double *a, double *b, double *c)
{
    /* Local variables */
    register double *d1;
    register double x;

    /* Function Body */

    cross (a, b, c);
    x = absv (c);

    if (x >= 1e-30)
        for (d1 = c; d1 < c + 3; )
            *d1++ /= x;

    return (0);
}
Beispiel #14
0
// -------- begin of function Music::change_volume --------//
void Music::change_volume(int vol)
{
	if( !init_flag )
		return;

	if( is_playing() )
	{
		if( play_type & MUSIC_PLAY_CD )
		{
			audio.set_cd_volume(vol);
		}
		else
		{
			AbsVolume absv(vol,0);
			audio.volume_long_wav(music_channel, DsVolume(absv));
		}
	}
}
Beispiel #15
0
void setprofile(real *prof1, real *prof2, int nprof, real rrange[],
		bodyptr btab, int nbody)
{
  real logrmin, logrdif, logr;
  bodyptr bp;
  int j;

  logrmin = rlog10(rrange[0]);
  logrdif = rlog10(rrange[1] / rrange[0]);
  for (bp = btab; bp < NthBody(btab, nbody); bp = NextBody(bp)) {
    logr = rlog10(absv(Pos(bp)));
    j = 1 + floor(nprof * (logr - logrmin) / logrdif);
    j = MAX(j, 0);
    j = MIN(j, nprof + 1);
    prof1[j] += Mass(bp);
    prof2[j] += rsqr(Mass(bp));
  }
}
Beispiel #16
0
//  treeforce: supervise force calculation.
//  _______________________________________
local void treeforce(void) {
  bodyptr p1, p2, p;
  real r, mr3i;

  p1 = bodytab + MAX(nstatic, 0);		// set dynamic body range
  p2 = bodytab + nbody + MIN(nstatic, 0);
  for (p = bodytab; p < bodytab+nbody; p++)	// loop over all bodies
    Update(p) = (testcalc ? p1 <= p && p < p2 : TRUE);
						// flag bodies to update
  maketree(bodytab, nbody);			// construct tree structure
  gravcalc();					// compute current forces
  forcereport();				// print force statistics
#if defined(EXTGRAV)
  for (p = bodytab; p < bodytab+nbody; p++)	// loop over all bodies
    if (Update(p) && gravgsp != NULL) {		// update in extern field?
      r = absv(Pos(p));				// get distance from origin
      mr3i = - mass_gsp(gravgsp, r) / (r*r*r);
      ADDMULVS(Acc(p), Pos(p), mr3i);		// add extern acc and phi
      Phi(p) += phi_gsp(gravgsp, r);
    }
#endif
}
Beispiel #17
0
gsprof *snapgsp(bodyptr btab, int nbody, int npoint, real alpha, real beta)
{
  gsprof *gsp;
  real *rtab, *dtab, *mtab, *coef, mtot;
  int nsamp, i, j;

  if (nbody % npoint != 0)
    error("%s: npoint must divide nbody\n", getargv0());
  gsp = (gsprof *) allocate(sizeof(gsprof));
  rtab = (real *) allocate(npoint * sizeof(real));
  dtab = (real *) allocate(npoint * sizeof(real));
  mtab = (real *) allocate(npoint * sizeof(real));
  coef = (real *) allocate(3 * npoint * sizeof(real));
  qsort(btab, nbody, SizeofBody, radrank);
  nsamp = nbody / npoint;
  mtot = 0.0;
  j = 0;
  for (i = 0; i < nbody; i++) {
    mtot += Mass(NthBody(btab, i));
    if (i % nsamp == nsamp - 1) {
      rtab[j] = absv(Pos(NthBody(btab, i)));
      mtab[j] = mtot;
      j++;
    }
  }
  spline(coef, rtab, mtab, npoint);
  for (j = 0; j < npoint; j++)
    dtab[j] =
      spldif(rtab[j], rtab, mtab, coef, npoint) / (FOUR_PI * rsqr(rtab[j]));
  gsp->npoint = npoint;
  gsp->radius = rtab;
  gsp->density = dtab;
  gsp->alpha = alpha;
  gsp->beta = beta;
  gsp->mass = mtab;
  gsp->mtot = mtot;
  return (gsp);
}
Beispiel #18
0
void sphrprof(void)
{
    bodyptr *rsort;
    int skip, j, i;
    real r;

    if (nspheroid > 1) {
	rsort = (bodyptr *) allocate(nspheroid * sizeof(bodyptr));
	for (i = 0; i < nspheroid; i++)
	    rsort[i] = NthBody(spheroid, i);
	qsort(rsort, nspheroid, sizeof(bodyptr), rankrad);
        skip = (int) rceil(MAX(((real) nspheroid) / (NTAB - 1), 1.0));
	msph[0] = rsph[0] = 0.0;
	for (j = 1; j < NTAB; j++) {
	    i = skip * j - 1;
	    rsph[j] = (i < nspheroid ? absv(Pos(rsort[i])) : 1.0 + rsph[j-1]);
	    msph[j] = 0.0;
	}
	for (i = 0; i < nspheroid; i++) {
	    j = i / skip + 1;
	    if (j > NTAB-1)
		error("%s.sphrprof: table overflow: i = %d  skip = %d\n",
			  getargv0(), i, skip);
	    msph[j] = msph[j] + Mass(rsort[i]);
	}
	for (j = 1; j < NTAB; j++)
	    msph[j] += msph[j - 1];
    } else {
        for (j = 0; j < NTAB; j++) {
	    rsph[j] = 1.0 * j;
	    msph[j] = Mass(NthBody(spheroid, 0));
	}
    }
    eprintf("[%s.sphrprof: rsph = %f %f %f ... %f %f]\n", getargv0(),
	    rsph[0], rsph[1], rsph[2], rsph[NTAB-2], rsph[NTAB-1]);
    spline(mcof, rsph, msph, NTAB);
}
Beispiel #19
0
double simplify_angle_180(double angle)
{
  double mult;

  if (angle > PI) {
    mult = angle/(2.0*PI);
    mult = floor(mult);
    angle -= mult*2.0*PI;
    if (angle > PI)
      angle -= 2.0 * PI;

  } else if (angle < PI) {
    mult = angle/(-2.0*PI);
    mult = floor(mult);
    angle += mult*2.0*PI;
    if (angle < -PI)
      angle += 2.0 * PI;
  }

  if (absv(angle+PI) < ANGLE_TOLERANCE)
    angle = PI;

  return angle;
}
Beispiel #20
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing cheevd
*/
int main( int argc, char** argv) 
{
    TESTING_CUDA_INIT();

    cuFloatComplex *h_A, *h_R, *h_work;
    float *rwork, *w1, *w2;
    magma_int_t *iwork;
    float gpu_time, cpu_time;

    magma_timestr_t start, end;

    /* Matrix size */
    magma_int_t N=0, n2;
    magma_int_t size[8] = {1024,2048,3072,4032,5184,6016,7040,8064};

    magma_int_t i, info;
    magma_int_t ione     = 1, izero = 0;
    magma_int_t ISEED[4] = {0,0,0,1};

    const char *uplo = MagmaLowerStr;
    const char *jobz = MagmaVectorsStr;

    magma_int_t checkres;
    float result[3], eps = lapackf77_slamch( "E" );

    if (argc != 1){
        for(i = 1; i<argc; i++){
            if (strcmp("-N", argv[i])==0) {
                N = atoi(argv[++i]);
            }
            else if ( strcmp("-JV", argv[i]) == 0 ) {
                jobz = MagmaVectorsStr;
            }
            else if ( strcmp("-JN", argv[i]) == 0 ) {
                jobz = MagmaNoVectorsStr;
            }
        }
        if (N>0)
            printf("  testing_cheevd -N %d [-JV] [-JN]\n\n", (int) N);
        else {
            printf("\nUsage: \n");
            printf("  testing_cheevd -N %d [-JV] [-JN]\n\n", (int) N);
            exit(1);
        }
    }
    else {
        printf("\nUsage: \n");
        printf("  testing_cheevd -N %d [-JV] [-JN]\n\n", 1024);
        N = size[7];
    }

    checkres  = getenv("MAGMA_TESTINGS_CHECK") != NULL;
    if ( checkres && jobz[0] == MagmaNoVectors ) {
        printf( "Cannot check results when vectors are not computed (jobz='N')\n" );
        checkres = false;
    }

    /* Query for workspace sizes */
    cuFloatComplex aux_work[1];
    float          aux_rwork[1];
    magma_int_t     aux_iwork[1];
    magma_cheevd( jobz[0], uplo[0],
                  N, h_R, N, w1,
                  aux_work,  -1,
                  aux_rwork, -1,
                  aux_iwork, -1,
                  &info );
    magma_int_t lwork, lrwork, liwork;
    lwork  = (magma_int_t) MAGMA_C_REAL( aux_work[0] );
    lrwork = (magma_int_t) aux_rwork[0];
    liwork = aux_iwork[0];

    /* Allocate host memory for the matrix */
    TESTING_MALLOC(    h_A, cuFloatComplex, N*N );
    TESTING_MALLOC(    w1,  float         , N   );
    TESTING_MALLOC(    w2,  float         , N   );
    TESTING_HOSTALLOC( h_R, cuFloatComplex, N*N );
    TESTING_HOSTALLOC( h_work, cuFloatComplex, lwork  );
    TESTING_MALLOC(    rwork,  float,          lrwork );
    TESTING_MALLOC(    iwork,  magma_int_t,     liwork );
    
    printf("  N     CPU Time(s)    GPU Time(s) \n");
    printf("===================================\n");
    for(i=0; i<8; i++){
        if (argc==1){
            N = size[i];
        }
        n2 = N*N;

        /* Initialize the matrix */
        lapackf77_clarnv( &ione, ISEED, &n2, h_A );
        for( int i=0; i<N; i++) {
            MAGMA_C_SET2REAL( h_A[i*N+i], MAGMA_C_REAL(h_A[i*N+i]) );
        }
        lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );

        /* warm up run */
        magma_cheevd(jobz[0], uplo[0],
                     N, h_R, N, w1,
                     h_work, lwork, 
                     rwork, lrwork, 
                     iwork, liwork, 
                     &info);
        
        lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );

        /* query for optimal workspace sizes */
        magma_cheevd(jobz[0], uplo[0],
                     N, h_R, N, w1,
                     h_work, -1,
                     rwork,  -1,
                     iwork,  -1,
                     &info);
        int lwork_save  = lwork;
        int lrwork_save = lrwork;
        int liwork_save = liwork;
        lwork  = min( lwork,  (magma_int_t) MAGMA_C_REAL( h_work[0] ));
        lrwork = min( lrwork, (magma_int_t) rwork[0] );
        liwork = min( liwork, iwork[0] );
        //printf( "lwork %d, query %d, used %d; liwork %d, query %d, used %d\n",
        //        lwork_save,  (magma_int_t) h_work[0], lwork,
        //        liwork_save, iwork[0], liwork );

        /* ====================================================================
           Performs operation using MAGMA
           =================================================================== */
        start = get_current_time();
        magma_cheevd(jobz[0], uplo[0],
                     N, h_R, N, w1,
                     h_work, lwork,
                     rwork, lrwork,
                     iwork, liwork,
                     &info);
        end = get_current_time();

        gpu_time = GetTimerValue(start,end)/1000.;

        lwork  = lwork_save;
        lrwork = lrwork_save;
        liwork = liwork_save;
        
        if ( checkres ) {
          /* =====================================================================
             Check the results following the LAPACK's [zcds]drvst routine.
             A is factored as A = U S U' and the following 3 tests computed:
             (1)    | A - U S U' | / ( |A| N )
             (2)    | I - U'U | / ( N )
             (3)    | S(with U) - S(w/o U) | / | S |
             =================================================================== */
          float temp1, temp2;
          cuFloatComplex *tau;

          lapackf77_chet21(&ione, uplo, &N, &izero,
                           h_A, &N,
                           w1, w1,
                           h_R, &N,
                           h_R, &N,
                           tau, h_work, rwork, &result[0]);
          
          lapackf77_clacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
          magma_cheevd('N', uplo[0],
                       N, h_R, N, w2,
                       h_work, lwork,
                       rwork, lrwork,
                       iwork, liwork,
                       &info);

          temp1 = temp2 = 0;
          for(int j=0; j<N; j++){
            temp1 = max(temp1, absv(w1[j]));
            temp1 = max(temp1, absv(w2[j]));
            temp2 = max(temp2, absv(w1[j]-w2[j]));
          }
          result[2] = temp2 / temp1;
        }

        /* =====================================================================
           Performs operation using LAPACK
           =================================================================== */
        start = get_current_time();
        lapackf77_cheevd(jobz, uplo,
                         &N, h_A, &N, w2,
                         h_work, &lwork,
                         rwork, &lrwork,
                         iwork, &liwork,
                         &info);
        end = get_current_time();
        if (info < 0)
          printf("Argument %d of cheevd had an illegal value.\n", (int) -info);

        cpu_time = GetTimerValue(start,end)/1000.;

        /* =====================================================================
           Print execution time
           =================================================================== */
        printf("%5d     %6.2f         %6.2f\n",
               (int) N, cpu_time, gpu_time);
        if ( checkres ){
          printf("Testing the factorization A = U S U' for correctness:\n");
          printf("(1)    | A - U S U' | / (|A| N) = %e\n", result[0]*eps);
          printf("(2)    | I -   U'U  | /  N      = %e\n", result[1]*eps);
          printf("(3)    | S(w/ U)-S(w/o U)|/ |S| = %e\n\n", result[2]);
        }

        if (argc != 1)
            break;
    }
 
    /* Memory clean up */
    TESTING_FREE(       h_A);
    TESTING_FREE(        w1);
    TESTING_FREE(        w2);
    TESTING_FREE(     rwork);
    TESTING_FREE(     iwork);
    TESTING_HOSTFREE(h_work);
    TESTING_HOSTFREE(   h_R);

    /* Shutdown */
    TESTING_CUDA_FINALIZE();
}
Beispiel #21
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing ssygvd
*/
int main( int argc, char** argv)
{
    TESTING_INIT_MGPU();

    float *h_A, *h_Ainit, *h_B, *h_Binit, *h_work;
    #if defined(PRECISION_z) || defined(PRECISION_c)
    float *rwork;
    #endif
    float *w1, *w2, result;
    magma_int_t *iwork;
    float mgpu_time, gpu_time, cpu_time;

    /* Matrix size */
    magma_int_t N=0, n2;

    magma_int_t info;
    magma_int_t ione = 1;

    float c_zero    = MAGMA_S_ZERO;
    float c_one     = MAGMA_S_ONE;
    float c_neg_one = MAGMA_S_NEG_ONE;

    magma_int_t ISEED[4] = {0,0,0,1};

    magma_timestr_t start, end;

    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    float tol    = opts.tolerance * lapackf77_slamch("E");
    float tolulp = opts.tolerance * lapackf77_slamch("P");

    char jobz = opts.jobz;
    int checkres = opts.check;

    char uplo = opts.uplo;
    magma_int_t itype = opts.itype;

    if ( checkres && jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        jobz = MagmaVec;
    }

    printf("using: nrgpu = %d, itype = %d, jobz = %c, uplo = %c, checkres = %d\n",
           (int) opts.ngpu, (int) itype, jobz, uplo, (int) checkres);

    printf("  N     M   nr GPU     MGPU Time(s) \n");
    printf("====================================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            n2     = N*N;
            #if defined(PRECISION_z) || defined(PRECISION_c)
            magma_int_t lwork = 2*N + N*N;
            magma_int_t lrwork = 1 + 5*N +2*N*N;
            // MKL's ssygvd has a bug for small N - it looks like what is returned by a 
            // query (consistent with LAPACK's number above) is different from a the memory
            // requirement ckeck (that returns info -11). The lwork increase below is needed
            // to pass this check.  
            if (N<32)
                lwork = 34*32;
            #else
            magma_int_t lwork  = 1 + 6*N + 2*N*N;
            #endif
            magma_int_t liwork = 3 + 5*N;

            TESTING_MALLOC_PIN( h_A,    float, n2    );
            TESTING_MALLOC_PIN( h_B,    float, n2    );
            TESTING_MALLOC_PIN( h_work, float, lwork );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_MALLOC_PIN( rwork, float, lrwork );
            #endif

            TESTING_MALLOC_CPU( w1,    float, N );
            TESTING_MALLOC_CPU( w2,    float, N );
            TESTING_MALLOC_CPU( iwork, magma_int_t, liwork );

            printf("  N     CPU Time(s)    GPU Time(s)   MGPU Time(s) \n");
            printf("==================================================\n");

            /* Initialize the matrix */
            lapackf77_slarnv( &ione, ISEED, &n2, h_A );
            lapackf77_slarnv( &ione, ISEED, &n2, h_B );
            magma_smake_hpd( N, h_B, N );
            magma_smake_symmetric( N, h_A, N );

            if((opts.warmup)||( checkres )){
                TESTING_MALLOC_CPU( h_Ainit, float, n2 );
                TESTING_MALLOC_CPU( h_Binit, float, n2 );
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_Ainit, &N );
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_Binit, &N );
            }

            if(opts.warmup){

                // ==================================================================
                // Warmup using MAGMA.
                // ==================================================================
                magma_ssygvd_m( opts.ngpu, itype, jobz, uplo,
                                N, h_A, N, h_B, N, w1,
                                h_work, lwork,
                                #if defined(PRECISION_z) || defined(PRECISION_c)
                                rwork, lrwork,
                                #endif
                                iwork, liwork,
                                &info);
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_Ainit, &N, h_A, &N );
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_Binit, &N, h_B, &N );
            }

            // ===================================================================
            // Performs operation using MAGMA
            // ===================================================================

            start = get_current_time();
            magma_ssygvd_m( opts.ngpu, itype, jobz, uplo,
                            N, h_A, N, h_B, N, w1,
                            h_work, lwork,
                            #if defined(PRECISION_z) || defined(PRECISION_c)
                            rwork, lrwork,
                            #endif
                            iwork, liwork,
                            &info);
            end = get_current_time();

            if(info != 0)
                printf("magma_ssygvd_m returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));

            mgpu_time = GetTimerValue(start,end)/1000.;

            if ( checkres ) {
                /* =====================================================================
                 Check the results following the LAPACK's [zc]hegvd routine.
                 A x = lambda B x is solved
                 and the following 3 tests computed:
                 (1)    | A Z - B Z D | / ( |A||Z| N )  (itype = 1)
                 | A B Z - Z D | / ( |A||Z| N )  (itype = 2)
                 | B A Z - Z D | / ( |A||Z| N )  (itype = 3)
                 =================================================================== */

                #if defined(PRECISION_d) || defined(PRECISION_s)
                float *rwork = h_work + N*N;
                #endif

                result = 1.;
                result /= lapackf77_slansy("1",&uplo, &N, h_Ainit, &N, rwork);
                result /= lapackf77_slange("1",&N , &N, h_A, &N, rwork);

                if (itype == 1){
                    blasf77_ssymm("L", &uplo, &N, &N, &c_one, h_Ainit, &N, h_A, &N, &c_zero, h_work, &N);
                    for(int i=0; i<N; ++i)
                        blasf77_sscal(&N, &w1[i], &h_A[i*N], &ione);
                    blasf77_ssymm("L", &uplo, &N, &N, &c_neg_one, h_Binit, &N, h_A, &N, &c_one, h_work, &N);
                    result *= lapackf77_slange("1", &N, &N, h_work, &N, rwork)/N;
                }
                else if (itype == 2){
                    blasf77_ssymm("L", &uplo, &N, &N, &c_one, h_Binit, &N, h_A, &N, &c_zero, h_work, &N);
                    for(int i=0; i<N; ++i)
                        blasf77_sscal(&N, &w1[i], &h_A[i*N], &ione);
                    blasf77_ssymm("L", &uplo, &N, &N, &c_one, h_Ainit, &N, h_work, &N, &c_neg_one, h_A, &N);
                    result *= lapackf77_slange("1", &N, &N, h_A, &N, rwork)/N;
                }
                else if (itype == 3){
                    blasf77_ssymm("L", &uplo, &N, &N, &c_one, h_Ainit, &N, h_A, &N, &c_zero, h_work, &N);
                    for(int i=0; i<N; ++i)
                        blasf77_sscal(&N, &w1[i], &h_A[i*N], &ione);
                    blasf77_ssymm("L", &uplo, &N, &N, &c_one, h_Binit, &N, h_work, &N, &c_neg_one, h_A, &N);
                    result *= lapackf77_slange("1", &N, &N, h_A, &N, rwork)/N;
                }

                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_Ainit, &N, h_A, &N );
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_Binit, &N, h_B, &N );

                /* ====================================================================
                 Performs operation using MAGMA
                 =================================================================== */
                start = get_current_time();
                magma_ssygvd(itype, jobz, uplo,
                             N, h_A, N, h_B, N, w2,
                             h_work, lwork,
                             #if defined(PRECISION_z) || defined(PRECISION_c)
                             rwork, lrwork,
                             #endif
                             iwork, liwork,
                             &info);
                end = get_current_time();

                if(info != 0)
                    printf("magma_ssygvd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));

                gpu_time = GetTimerValue(start,end)/1000.;

                /* =====================================================================
                 Performs operation using LAPACK
                 =================================================================== */
                start = get_current_time();
                lapackf77_ssygvd(&itype, &jobz, &uplo,
                                 &N, h_Ainit, &N, h_Binit, &N, w2,
                                 h_work, &lwork,
                                 #if defined(PRECISION_z) || defined(PRECISION_c)
                                 rwork, &lrwork,
                                 #endif
                                 iwork, &liwork,
                                 &info);
                end = get_current_time();
                if (info != 0)
                    printf("lapackf77_ssygvd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));

                cpu_time = GetTimerValue(start,end)/1000.;

                float temp1 = 0;
                float temp2 = 0;
                for(int j=0; j<N; j++){
                    temp1 = max(temp1, absv(w1[j]));
                    temp1 = max(temp1, absv(w2[j]));
                    temp2 = max(temp2, absv(w1[j]-w2[j]));
                }
                float result2 = temp2 / (((float)N)*temp1);

                /* =====================================================================
                 Print execution time
                 =================================================================== */
                printf("%5d     %6.2f         %6.2f         %6.2f\n",
                       (int) N, cpu_time, gpu_time, mgpu_time);
                printf("Testing the eigenvalues and eigenvectors for correctness:\n");
                if(itype==1)
                    printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %8.2e%s\n", result, (result < tol ? "" : "  failed") );
                else if(itype==2)
                    printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %8.2e%s\n", result, (result < tol ? "" : "  failed") );
                else if(itype==3)
                    printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %8.2e%s\n", result, (result < tol ? "" : "  failed") );

                printf(    "(3)    | D(MGPU)-D(LAPACK) |/ |D|    = %8.2e%s\n\n", result2, (result2 < tolulp ? "" : "  failed") );
            }
            else {
                printf("%5d     ------         ------         %6.2f\n",
                       (int) N, mgpu_time);
            }

            /* Memory clean up */
            TESTING_FREE_PIN( h_A    );
            TESTING_FREE_PIN( h_B    );
            TESTING_FREE_PIN( h_work );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_FREE_PIN( rwork  );
            #endif
            
            TESTING_FREE_CPU( w1    );
            TESTING_FREE_CPU( w2    );
            TESTING_FREE_CPU( iwork );

            if((opts.warmup)||( checkres )){
                TESTING_FREE_CPU( h_Ainit );
                TESTING_FREE_CPU( h_Binit );
            }
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    /* Shutdown */
    TESTING_FINALIZE_MGPU();
}
int main(int argc, char **argv) 
{

    int 	nxtarg = 0;
    int 	i;
    char	c;
    char 	tmpstring[TMPSTRINGLEN+1];
    char 	filename[TMPSTRINGLEN+1];
    char 	outfilename[TMPSTRINGLEN+1];
    FILE	*cpudsptimefile = NULL;
    FILE	*cpupostimefile = NULL;
    FILE	*possynctimefile = NULL;
    FILE	*outfile = NULL;

    u32		cpudsp1[3], cpudsp2[3]; // this assumes that each timecheck contains three numbers, the cpu time, the dsp time, and the subsequent cpu time 
    u32 	cpupos;
    u32 	lastcpupos;
    u32		dspframetime;
    u32		possynctime = 0;
    int 	ngaps = 0;  // the number of > 40 ms gaps 
    int		ncomputed = 0; // the number of times we used the computed timestamp instead of the sync edge time
    u32		tmpdiff, lastdiff, maxframetimediff = 0;

    while (++nxtarg < argc) {
	if (strcmp(argv[nxtarg], "-cd") == 0) {
	    strcpy(filename, argv[++nxtarg]);
	    /* open the file */
	    if ((cpudsptimefile = fopen(filename, "r")) 
		    == NULL) {
		fprintf(stderr, "Error opening %s for reading\n", filename);
		exit(-1);
            }
	}
	else if (strcmp(argv[nxtarg], "-cp") == 0) {
	    /* open cpu pos time file */
	    strcpy(filename, argv[++nxtarg]);
	    if ((cpupostimefile = fopen(filename, "r")) == NULL) {
		fprintf(stderr, "Error opening %s for reading\n", filename);
		exit(-1);
            }
	}
	else if (strcmp(argv[nxtarg], "-ps") == 0) {
	    /* open pos sync times file */
	    strcpy(filename, argv[++nxtarg]);
	    if ((possynctimefile = fopen(filename, "r")) == NULL) {
		fprintf(stderr, "Error opening %s for reading\n", filename);
		exit(-1);
            }
	}
	else if (strcmp(argv[nxtarg], "-o") == 0) {
            /* open the output file */
            strcpy(outfilename, argv[++nxtarg]);
            if ((outfile = fopen(outfilename, "w")) == NULL) {
                fprintf(stderr, "Error opening %s for writing\n", outfilename);
                exit(-1);
            }
        }
	else {
	    fprintf(stderr, "Usage: nspike_postimestamp -cd cpudsptimefile -cp cpupostimefile -ps possynctimesfile -o outputfile\n"); 
            exit(1);
	}
    }

    /* check to make sure that all necessary arguments have been specified */
    if ((cpudsptimefile == NULL) || (cpupostimefile == NULL) || 
	    (possynctimefile == NULL) || (outfile == NULL)) {
	fprintf(stderr, "Usage: nspike_postimestamp -cd cpudsptimefile -cp cpupostimefile -ps possynctimesfile -o outputfile\n"); 
	exit(1);
    }

    /* write out an uncompressed header to the output file */
    fprintf(outfile, "%%%%BEGINHEADER\n");
    fprintf(outfile, "%% File type:\tBinary\n");
    fprintf(outfile, "%% Extraction type:\tdsp position frame time stamps \n");
    fprintf(outfile, "%% Fields:\t  timestamp (unsigned int)\n");
    fprintf(outfile, "%%%%ENDHEADER\n");

    /* read past the headers of the input files */

    do {
        fgets(tmpstring, TMPSTRINGLEN, cpudsptimefile);
    } while ((strncmp(tmpstring, "%%ENDHEADER", 10) != 0) &&
             (strncmp(tmpstring, "%%ENDCONFIG", 10) != 0));

    do {
        fgets(tmpstring, TMPSTRINGLEN, cpupostimefile);
    } while ((strncmp(tmpstring, "%%ENDHEADER", 10) != 0) &&
             (strncmp(tmpstring, "%%ENDCONFIG", 10) != 0));

    do {
        fgets(tmpstring, TMPSTRINGLEN, possynctimefile);
    } while ((strncmp(tmpstring, "%%ENDHEADER", 10) != 0) &&
             (strncmp(tmpstring, "%%ENDCONFIG", 10) != 0));

    if (nextvalidcpudsptime(cpudsptimefile, cpudsp1) == 0) {
	exit(-1);
    }
    cpudsp2[0] = 0;
    /* Now we go through the cpupostimefile and find the possync time that
     * corresponds to each frame */

    lastcpupos = 0;
    while (!feof(cpupostimefile)) {
	/* read in the next cpupostimefile entry */
	if (fread(&cpupos, sizeof(u32), 1, cpupostimefile) != 1) {
	    fprintf(stderr, "Error reading cpu - position frame time from cpupostimefile at offset %ld, eof = %d\n", ftell(cpupostimefile), feof(cpupostimefile));
	    break;
	}
	if (lastcpupos && (cpupos - lastcpupos > 400)) {
	    ngaps++;
	}
	lastcpupos = cpupos;
	/* make sure that the next cpudsp time is after the current cpu frame
	 * time */

	while ((cpudsp2[0] < cpupos) && nextvalidcpudsptime(cpudsptimefile,
			cpudsp2)) {
	    /* move cpudsp2 to cpudsp1 and move on */
	    memcpy(cpudsp1, cpudsp2, 3 * sizeof(u32));
	}

	/* use the closest cpudsp time to convert the current frame time to a
	 * dsp clock time */
	if (absv(cpudsp1[0] - cpupos) < absv(cpudsp2[0] - cpupos)) {
	    dspframetime = cpupos + (cpudsp1[1] - cpudsp1[0]);
	}
	else {
	    dspframetime = cpupos + (cpudsp2[1] - cpudsp2[0]);
	}
	/* initialize the first possync time so that we read in the correct
	 * first value */
	if (possynctime == 0) {
	    possynctime = dspframetime;
	}
	/* find the pos sync time closest to the dspframe time and write it out
	 * to the file */
	tmpdiff = UINT_MAX;
	/* check to see if the possynctime is much greater than the current
	 * dspframetime as can occur when a time offset has been added */
	if (possynctime < dspframetime + 10000) {
	    do {
		lastdiff = tmpdiff;
		/* get the timestamp */
		if (fread(&possynctime, sizeof(u32), 1, possynctimefile) != 1) {
		    fprintf(stderr, "Error reading position frame sync time from possynctime file\n");
		    exit(-1);
		}
		/* get the transition */
		if (fread(&c, sizeof(char), 1, possynctimefile) != 1) {
		    fprintf(stderr, "Error reading position frame sync type from possynctime file\n");
		    exit(-1);
		}
	    } while (((tmpdiff = abs(possynctime - dspframetime)) > 50) && 
		 (tmpdiff < lastdiff));
	}
	if (tmpdiff >= 1000) {
	    /* we should use the computed time instead of the read in time, as
	     * we may have missed the edge of a frame */
	    fwrite(&dspframetime, sizeof(u32), 1, outfile);
	    ncomputed++;
	}
	else {
	    fwrite(&possynctime, sizeof(u32), 1, outfile);
	    if (tmpdiff > maxframetimediff) {
		maxframetimediff = tmpdiff;
	    }
	}
    }
    fprintf(stderr, "Timestamp file created, %d gaps > 40 ms\nLargest discrepancy between computed and actual dsp frame times = %2.1f ms\nNumber of frames for which the computed timestamp was used = %d\n", ngaps, 
	    (float) maxframetimediff / 10, ncomputed);

    fclose(outfile);
    fclose(possynctimefile);
    fclose(cpudsptimefile);
    fclose(cpupostimefile);
}
Beispiel #23
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dsyevd
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gpu_time, cpu_time;
    double *h_A, *h_R, *d_R, *h_work;
    double *w1, *w2;
    magma_int_t *iwork;
    magma_int_t N, n2, info, lwork, liwork, lda, ldda, aux_iwork[1];
    magma_int_t izero    = 0;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    double result[3], eps, aux_work[1];
    eps = lapackf77_dlamch( "E" );

    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    double tol    = opts.tolerance * lapackf77_dlamch("E");
    double tolulp = opts.tolerance * lapackf77_dlamch("P");
    
    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }
    
    printf("    N   CPU Time (sec)   GPU Time (sec)\n");
    printf("=======================================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            n2   = N*N;
            lda  = N;
            ldda = ((N + 31)/32)*32;
            
            // query for workspace sizes
            magma_dsyevd_gpu( opts.jobz, opts.uplo,
                              N, NULL, ldda, NULL,
                              NULL, lda,
                              aux_work,  -1,
                              aux_iwork, -1,
                              &info );
            lwork  = (magma_int_t) aux_work[0];
            liwork = aux_iwork[0];
            
            /* Allocate host memory for the matrix */
            TESTING_MALLOC_CPU( h_A,    double,      N*lda  );
            TESTING_MALLOC_CPU( w1,     double,      N      );
            TESTING_MALLOC_CPU( w2,     double,      N      );
            TESTING_MALLOC_CPU( iwork,  magma_int_t, liwork );
            
            TESTING_MALLOC_PIN( h_R,    double,      N*lda  );
            TESTING_MALLOC_PIN( h_work, double,      lwork  );
            
            TESTING_MALLOC_DEV( d_R,    double,      N*ldda );
            
            /* Initialize the matrix */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            magma_dsetmatrix( N, N, h_A, lda, d_R, ldda );
            
            /* warm up run */
            if ( opts.warmup ) {
                magma_dsyevd_gpu( opts.jobz, opts.uplo,
                                  N, d_R, ldda, w1,
                                  h_R, lda,
                                  h_work, lwork,
                                  iwork, liwork,
                                  &info );
                if (info != 0)
                    printf("magma_dsyevd_gpu returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                magma_dsetmatrix( N, N, h_A, lda, d_R, ldda );
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_dsyevd_gpu( opts.jobz, opts.uplo,
                              N, d_R, ldda, w1,
                              h_R, lda,
                              h_work, lwork,
                              iwork, liwork,
                              &info );
            gpu_time = magma_wtime() - gpu_time;
            if (info != 0)
                printf("magma_dsyevd_gpu returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            if ( opts.check ) {
                /* =====================================================================
                   Check the results following the LAPACK's [zcds]drvst routine.
                   A is factored as A = U S U' and the following 3 tests computed:
                   (1)    | A - U S U' | / ( |A| N )
                   (2)    | I - U'U | / ( N )
                   (3)    | S(with U) - S(w/o U) | / | S |
                   =================================================================== */
                double temp1, temp2;
                
                // tau=NULL is unused since itype=1
                magma_dgetmatrix( N, N, d_R, ldda, h_R, lda );
                lapackf77_dsyt21( &ione, &opts.uplo, &N, &izero,
                                  h_A, &lda,
                                  w1, h_work,
                                  h_R, &lda,
                                  h_R, &lda,
                                  NULL, h_work, &result[0] );
                
                magma_dsetmatrix( N, N, h_A, lda, d_R, ldda );
                magma_dsyevd_gpu( MagmaNoVec, opts.uplo,
                                  N, d_R, ldda, w2,
                                  h_R, lda,
                                  h_work, lwork,
                                  iwork, liwork,
                                  &info );
                if (info != 0)
                    printf("magma_dsyevd_gpu returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                temp1 = temp2 = 0;
                for( int j=0; j<N; j++ ) {
                    temp1 = max(temp1, absv(w1[j]));
                    temp1 = max(temp1, absv(w2[j]));
                    temp2 = max(temp2, absv(w1[j]-w2[j]));
                }
                result[2] = temp2 / (((double)N)*temp1);
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_dsyevd( &opts.jobz, &opts.uplo,
                                  &N, h_A, &lda, w2,
                                  h_work, &lwork,
                                  iwork, &liwork,
                                  &info );
                cpu_time = magma_wtime() - cpu_time;
                if (info != 0)
                    printf("lapackf77_dsyevd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                printf("%5d   %7.2f          %7.2f\n",
                       (int) N, cpu_time, gpu_time);
            }
            else {
                printf("%5d     ---            %7.2f\n",
                       (int) N, gpu_time);
            }
            
            /* =====================================================================
               Print execution time
               =================================================================== */
            if ( opts.check ) {
                printf("Testing the factorization A = U S U' for correctness:\n");
                printf("(1)    | A - U S U' | / (|A| N)     = %8.2e%s\n",   result[0]*eps, (result[0]*eps < tol ? "" : "  failed") );
                printf("(2)    | I -   U'U  | /  N          = %8.2e%s\n",   result[1]*eps, (result[1]*eps < tol ? "" : "  failed") );
                printf("(3)    | S(w/ U) - S(w/o U) | / |S| = %8.2e%s\n\n", result[2]    , (result[2]  < tolulp ? "" : "  failed") );
            }
            
            TESTING_FREE_CPU( h_A   );
            TESTING_FREE_CPU( w1    );
            TESTING_FREE_CPU( w2    );
            TESTING_FREE_CPU( iwork );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_work );
            
            TESTING_FREE_DEV( d_R );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }
    
    TESTING_FINALIZE();
    return 0;
}
Beispiel #24
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zhegvd
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gpu_time, cpu_time;
    magmaDoubleComplex *h_A, *h_R, *h_B, *h_S, *h_work;
    double *rwork, *w1, *w2;
    double result[4] = {0};
    magma_int_t *iwork;
    magma_int_t N, n2, info, nb, lwork, liwork, lda, lrwork;
    magmaDoubleComplex c_zero    = MAGMA_Z_ZERO;
    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    double d_one         =  1.;
    double d_neg_one     = -1.;
    //double d_ten         = 10.;
    //magma_int_t izero    = 0;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );

    double tol    = opts.tolerance * lapackf77_dlamch("E");
    double tolulp = opts.tolerance * lapackf77_dlamch("P");
    
    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }
    
    printf("using: itype = %d, jobz = %s, uplo = %s\n",
           (int) opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo));

    printf("    N   CPU Time (sec)   GPU Time(sec)\n");
    printf("======================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda    = N;
            n2     = N*lda;
            nb     = magma_get_zhetrd_nb(N);
            lwork  = 2*N*nb + N*N;
            lrwork = 1 + 5*N +2*N*N;
            liwork = 3 + 5*N;

            TESTING_MALLOC_CPU( h_A,    magmaDoubleComplex,  n2     );
            TESTING_MALLOC_CPU( h_B,    magmaDoubleComplex,  n2     );
            TESTING_MALLOC_CPU( w1,     double,              N      );
            TESTING_MALLOC_CPU( w2,     double,              N      );
            TESTING_MALLOC_CPU( rwork,  double,              lrwork );
            TESTING_MALLOC_CPU( iwork,  magma_int_t,         liwork );
            
            TESTING_MALLOC_PIN( h_R,    magmaDoubleComplex,  n2     );
            TESTING_MALLOC_PIN( h_S,    magmaDoubleComplex,  n2     );
            TESTING_MALLOC_PIN( h_work, magmaDoubleComplex,  lwork  );
            
            /* Initialize the matrix */
            lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
            //lapackf77_zlatms( &N, &N, "U", ISEED, "P", w1, &five, &d_ten,
            //                 &d_one, &N, &N, lapack_uplo_const(opts.uplo), h_B, &lda, h_work, &info);
            //lapackf77_zlaset( "A", &N, &N, &c_zero, &c_one, h_B, &lda);
            lapackf77_zlarnv( &ione, ISEED, &n2, h_B );
            magma_zmake_hpd( N, h_B, lda );
            lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
            lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &lda, h_S, &lda );
            
            /* warmup */
            if ( opts.warmup ) {
                magma_zhegvd( opts.itype, opts.jobz, opts.uplo,
                              N, h_R, lda, h_S, lda, w1,
                              h_work, lwork,
                              rwork, lrwork,
                              iwork, liwork,
                              &info );
                if (info != 0)
                    printf("magma_zhegvd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &lda, h_S, &lda );
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_zhegvd( opts.itype, opts.jobz, opts.uplo,
                          N, h_R, lda, h_S, lda, w1,
                          h_work, lwork,
                          rwork, lrwork,
                          iwork, liwork,
                          &info );
            gpu_time = magma_wtime() - gpu_time;
            if (info != 0)
                printf("magma_zhegvd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            if ( opts.check ) {
                /* =====================================================================
                   Check the results following the LAPACK's [zc]hegvd routine.
                   A x = lambda B x is solved
                   and the following 3 tests computed:
                   (1)    | A Z - B Z D | / ( |A||Z| N )   (itype = 1)
                          | A B Z - Z D | / ( |A||Z| N )   (itype = 2)
                          | B A Z - Z D | / ( |A||Z| N )   (itype = 3)
                   (2)    | I - V V' B | / ( N )           (itype = 1,2)
                          | B - V V' | / ( |B| N )         (itype = 3)
                   (3)    | S(with V) - S(w/o V) | / | S |
                   =================================================================== */
                double temp1, temp2;
                //magmaDoubleComplex *tau;
                
                if ( opts.itype == 1 || opts.itype == 2 ) {
                    lapackf77_zlaset( "A", &N, &N, &c_zero, &c_one, h_S, &lda);
                    blasf77_zgemm("N", "C", &N, &N, &N, &c_one, h_R, &lda, h_R, &lda, &c_zero, h_work, &N);
                    blasf77_zhemm("R", lapack_uplo_const(opts.uplo), &N, &N, &c_neg_one, h_B, &lda, h_work, &N, &c_one, h_S, &lda);
                    result[1] = lapackf77_zlange("1", &N, &N, h_S, &lda, rwork) / N;
                }
                else if ( opts.itype == 3 ) {
                    lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &lda, h_S, &lda);
                    blasf77_zherk(lapack_uplo_const(opts.uplo), "N", &N, &N, &d_neg_one, h_R, &lda, &d_one, h_S, &lda);
                    result[1] = lapackf77_zlanhe("1", lapack_uplo_const(opts.uplo), &N, h_S, &lda, rwork) / N
                              / lapackf77_zlanhe("1", lapack_uplo_const(opts.uplo), &N, h_B, &lda, rwork);
                }
                
                result[0] = 1.;
                result[0] /= lapackf77_zlanhe("1", lapack_uplo_const(opts.uplo), &N, h_A, &lda, rwork);
                result[0] /= lapackf77_zlange("1", &N, &N, h_R, &lda, rwork);
                
                if ( opts.itype == 1 ) {
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_A, &lda, h_R, &lda, &c_zero, h_work, &N);
                    for(int i=0; i<N; ++i)
                        blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_neg_one, h_B, &lda, h_R, &lda, &c_one, h_work, &N);
                    result[0] *= lapackf77_zlange("1", &N, &N, h_work, &lda, rwork)/N;
                }
                else if ( opts.itype == 2 ) {
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_B, &lda, h_R, &lda, &c_zero, h_work, &N);
                    for(int i=0; i<N; ++i)
                        blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_A, &lda, h_work, &N, &c_neg_one, h_R, &lda);
                    result[0] *= lapackf77_zlange("1", &N, &N, h_R, &lda, rwork)/N;
                }
                else if ( opts.itype == 3 ) {
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_A, &lda, h_R, &lda, &c_zero, h_work, &N);
                    for(int i=0; i<N; ++i)
                        blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_zhemm("L", lapack_uplo_const(opts.uplo), &N, &N, &c_one, h_B, &lda, h_work, &N, &c_neg_one, h_R, &lda);
                    result[0] *= lapackf77_zlange("1", &N, &N, h_R, &lda, rwork)/N;
                }
                
                /*
                lapackf77_zhet21( &ione, lapack_uplo_const(opts.uplo), &N, &izero,
                                  h_A, &lda,
                                  w1, w1,
                                  h_R, &lda,
                                  h_R, &lda,
                                  tau, h_work, rwork, &result[0] );
                */
                
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &lda, h_R, &lda );
                lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &lda, h_S, &lda );
                
                magma_zhegvd( opts.itype, MagmaNoVec, opts.uplo,
                              N, h_R, lda, h_S, lda, w2,
                              h_work, lwork,
                              rwork, lrwork,
                              iwork, liwork,
                              &info );
                if (info != 0)
                    printf("magma_zhegvd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                temp1 = temp2 = 0;
                for(int j=0; j<N; j++) {
                    temp1 = max(temp1, absv(w1[j]));
                    temp1 = max(temp1, absv(w2[j]));
                    temp2 = max(temp2, absv(w1[j]-w2[j]));
                }
                result[2] = temp2 / (((double)N)*temp1);
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_zhegvd( &opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo),
                                  &N, h_A, &lda, h_B, &lda, w2,
                                  h_work, &lwork,
                                  rwork, &lrwork,
                                  iwork, &liwork,
                                  &info );
                cpu_time = magma_wtime() - cpu_time;
                if (info != 0)
                    printf("lapackf77_zhegvd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                printf("%5d     %7.2f         %7.2f\n",
                       (int) N, cpu_time, gpu_time);
            }
            else {
                printf("%5d       ---           %7.2f\n",
                       (int) N, gpu_time);
            }
            
            /* =====================================================================
               Print execution time
               =================================================================== */
            if ( opts.check ) {
                printf("Testing the eigenvalues and eigenvectors for correctness:\n");
                if ( opts.itype==1 ) {
                    printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed") );
                }
                else if ( opts.itype==2 ) {
                    printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed") );
                }
                else if ( opts.itype==3 ) {
                    printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed") );
                }
                if ( opts.itype==1 || opts.itype==2 ) {
                    printf("(2)    | I -   Z Z' B | /  N         = %8.2e   %s\n",   result[1], (result[1] < tol    ? "ok" : "failed") );
                }
                else {
                    printf("(2)    | B -  Z Z' | / (|B| N)       = %8.2e   %s\n",   result[1], (result[1] < tol    ? "ok" : "failed") );
                }
                printf(    "(3)    | D(w/ Z) - D(w/o Z) | / |D|  = %8.2e   %s\n\n", result[2], (result[2] < tolulp ? "ok" : "failed") );
                status += ! (result[0] < tol && result[1] < tol && result[2] < tolulp);
            }
            
            TESTING_FREE_CPU( h_A    );
            TESTING_FREE_CPU( h_B    );
            TESTING_FREE_CPU( w1     );
            TESTING_FREE_CPU( w2     );
            TESTING_FREE_CPU( rwork  );
            TESTING_FREE_CPU( iwork  );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_S    );
            TESTING_FREE_PIN( h_work );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }
    
    TESTING_FINALIZE();
    return status;
}
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zhegvdx
*/
int main( int argc, char** argv)
{

//#define USE_MGPU
#ifdef USE_MGPU
    TESTING_CUDA_INIT_MGPU();
#else
    TESTING_CUDA_INIT();
#endif
    magma_int_t nrgpu =1;

    cuDoubleComplex *h_A, *h_R, *h_B, *h_S, *h_work;
    double *rwork, *w1, *w2;
    magma_int_t *iwork;
    double gpu_time, cpu_time;

    magma_timestr_t start, end;

    /* Matrix size */
    magma_int_t N=0, n2;
    magma_int_t size[4] = {1024,2048,4100,6001};

    magma_int_t i, itype, info;
    magma_int_t ione = 1, izero = 0;
    magma_int_t five = 5;

    cuDoubleComplex c_zero    = MAGMA_Z_ZERO;
    cuDoubleComplex c_one     = MAGMA_Z_ONE;
    cuDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;

    double d_one     =  1.;
    double d_neg_one = -1.;
    double d_ten     = 10.;
    magma_int_t ISEED[4] = {0,0,0,1};

    magma_int_t il,iu,m1,m2;
    double vl,vu;

    double fraction_ev = 0;

    //const char *uplo = MagmaLowerStr;
    char *uplo = (char*)MagmaLowerStr;
    //char *uplo = (char*)MagmaUpperStr;
    char *jobz = (char*)MagmaVectorsStr;
    char range = 'A';
    itype = 1;

    magma_int_t checkres;
    double result[2];

    int flagN = 0;

    if (argc != 1){
        for(i = 1; i<argc; i++){
            if (strcmp("-N", argv[i])==0){
                N = atoi(argv[++i]);
                if (N>0){
                   printf("  testing_zhegvdx -N %d\n\n", (int) N);
                   flagN=1;
                }
                else {
                   printf("\nUsage: \n");
                   printf("  testing_zhegvdx -N %d\n\n", (int) N);
                   exit(1);
                }
            }
            if (strcmp("-ngpu", argv[i])==0){
                nrgpu = atoi(argv[++i]);
                if (nrgpu>0){
                   printf("  testing_zhegvdx -ngpu %d\n\n", (int) nrgpu);
                }
                else {
                   printf("\nUsage: \n");
                   printf("  testing_zhegvdx -ngpu %d\n\n", (int) nrgpu);
                   exit(1);
                }
            }
            if (strcmp("-itype", argv[i])==0){
                itype = atoi(argv[++i]);
                if (itype>0 && itype <= 3){
                   printf("  testing_zhegvdx -itype %d\n\n", (int) itype);
                }
                else {
                   printf("\nUsage: \n");
                   printf("  testing_zhegvdx -itype %d\n\n", (int) itype);
                   exit(1);
                }
            }
            if (strcmp("-FE", argv[i])==0){
                fraction_ev = atof(argv[++i]);
                if (fraction_ev > 0 && fraction_ev <= 1){
                    printf("  testing_zhegvdx -FE %f\n\n", fraction_ev);
                }
                else {
                    fraction_ev = 0;
                }
            }
            if (strcmp("-L", argv[i])==0){
              uplo = (char*)MagmaLowerStr;
              printf("  testing_zhegvdx -L");
            }
            if (strcmp("-U", argv[i])==0){
              uplo = (char*)MagmaUpperStr;
              printf("  testing_zhegvdx -U");
            }

        }

    } else {
        printf("\nUsage: \n");
        printf("  testing_zhegvdx -L/U -N %d -itype %d\n\n", 1024, 1);
    }

    if(!flagN)
        N = size[3];

    checkres  = getenv("MAGMA_TESTINGS_CHECK") != NULL;

    n2  = N * N;

    /* Allocate host memory for the matrix */
    TESTING_MALLOC(   h_A, cuDoubleComplex, n2);
    TESTING_MALLOC(   h_B, cuDoubleComplex, n2);
    TESTING_MALLOC(    w1, double         ,  N);
    TESTING_MALLOC(    w2, double         ,  N);
    TESTING_HOSTALLOC(h_R, cuDoubleComplex, n2);
    TESTING_HOSTALLOC(h_S, cuDoubleComplex, n2);

    magma_int_t nb = magma_get_zhetrd_nb(N);
    magma_int_t lwork = magma_zbulge_get_lq2(N) + 2*N + N*N;
    magma_int_t lrwork = 1 + 5*N +2*N*N;
    magma_int_t liwork = 3 + 5*N;

    TESTING_HOSTALLOC(h_work, cuDoubleComplex,  lwork);
    TESTING_HOSTALLOC( rwork,          double, lrwork);
    TESTING_MALLOC(    iwork,     magma_int_t, liwork);

    printf("  N     M     GPU Time(s) \n");
    printf("==========================\n");
    for(i=0; i<4; i++){
        if (!flagN){
            N = size[i];
            n2 = N*N;
        }
        if (fraction_ev == 0){
            il = N / 10;
            iu = N / 5+il;
        }
        else {
            il = 1;
            iu = (int)(fraction_ev*N);
            if (iu < 1) iu = 1;
        }

        /* Initialize the matrix */
        lapackf77_zlarnv( &ione, ISEED, &n2, h_A );
        //lapackf77_zlatms( &N, &N, "U", ISEED, "P", w1, &five, &d_ten,
        //                 &d_one, &N, &N, uplo, h_B, &N, h_work, &info);
        //lapackf77_zlaset( "A", &N, &N, &c_zero, &c_one, h_B, &N);
        lapackf77_zlarnv( &ione, ISEED, &n2, h_B );
        /* increase the diagonal */
        {
          magma_int_t i, j;
          for(i=0; i<N; i++) {
            MAGMA_Z_SET2REAL( h_B[i*N+i], ( MAGMA_Z_REAL(h_B[i*N+i]) + 1.*N ) );
          }
        }
        lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
        lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );

#ifdef USE_MGPU
        magma_zhegvdx_2stage_m(nrgpu, itype, jobz[0], range, uplo[0],
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                               h_work, lwork,
                               rwork, lrwork,
                               iwork, liwork,
                               &info);
#else
        magma_zhegvdx_2stage(itype, jobz[0], range, uplo[0],
                             N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                             h_work, lwork,
                             rwork, lrwork,
                             iwork, liwork,
                             &info);
#endif

        lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
        lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );


        /* ====================================================================
           Performs operation using MAGMA
           =================================================================== */
        start = get_current_time();
#ifdef USE_MGPU
        magma_zhegvdx_2stage_m(nrgpu, itype, jobz[0], range, uplo[0],
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                               h_work, lwork,
                               rwork, lrwork,
                               iwork, liwork,
                               &info);
#else
        magma_zhegvdx_2stage(itype, jobz[0], range, uplo[0],
                             N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                             h_work, lwork,
                             rwork, lrwork,
                             iwork, liwork,
                             &info);
#endif
        end = get_current_time();

        gpu_time = GetTimerValue(start,end)/1000.;

        if ( checkres ) {
          /* =====================================================================
             Check the results following the LAPACK's [zc]hegvdx routine.
             A x = lambda B x is solved
             and the following 3 tests computed:
             (1)    | A Z - B Z D | / ( |A||Z| N )  (itype = 1)
                    | A B Z - Z D | / ( |A||Z| N )  (itype = 2)
                    | B A Z - Z D | / ( |A||Z| N )  (itype = 3)
             (2)    | S(with V) - S(w/o V) | / | S |
             =================================================================== */
          double temp1, temp2;
          cuDoubleComplex *tau;

          result[0] = 1.;
          result[0] /= lapackf77_zlanhe("1",uplo, &N, h_A, &N, rwork);
          result[0] /= lapackf77_zlange("1",&N , &m1, h_R, &N, rwork);

          if (itype == 1){
            blasf77_zhemm("L", uplo, &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
            for(int i=0; i<m1; ++i)
              blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
            blasf77_zhemm("L", uplo, &N, &m1, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N);
            result[0] *= lapackf77_zlange("1", &N, &m1, h_work, &N, rwork)/N;
          }
          else if (itype == 2){
            blasf77_zhemm("L", uplo, &N, &m1, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N);
            for(int i=0; i<m1; ++i)
              blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
            blasf77_zhemm("L", uplo, &N, &m1, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N);
            result[0] *= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork)/N;
          }
          else if (itype == 3){
            blasf77_zhemm("L", uplo, &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
            for(int i=0; i<m1; ++i)
              blasf77_zdscal(&N, &w1[i], &h_R[i*N], &ione);
            blasf77_zhemm("L", uplo, &N, &m1, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N);
            result[0] *= lapackf77_zlange("1", &N, &m1, h_R, &N, rwork)/N;
          }


          lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
          lapackf77_zlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );

          magma_zhegvdx(itype, 'N', range, uplo[0],
                       N, h_R, N, h_S, N, vl, vu, il, iu, &m2, w2,
                       h_work, lwork,
                       rwork, lrwork,
                       iwork, liwork,
                       &info);

          temp1 = temp2 = 0;
          for(int j=0; j<m2; j++){
            temp1 = max(temp1, absv(w1[j]));
            temp1 = max(temp1, absv(w2[j]));
            temp2 = max(temp2, absv(w1[j]-w2[j]));
          }
          result[1] = temp2 / temp1;
        }


        /* =====================================================================
           Print execution time
           =================================================================== */
        printf("%5d %5d     %6.2f\n",
               (int) N, (int) m1, gpu_time);
        if ( checkres ){
          printf("Testing the eigenvalues and eigenvectors for correctness:\n");
          if(itype==1)
             printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %e\n", result[0]);
          else if(itype==2)
             printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %e\n", result[0]);
          else if(itype==3)
             printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %e\n", result[0]);

          printf("(2)    | D(w/ Z)-D(w/o Z)|/ |D| = %e\n\n", result[1]);
        }

        if (flagN)
            break;
    }

    cudaSetDevice(0);
    /* Memory clean up */
    TESTING_FREE(       h_A);
    TESTING_FREE(       h_B);
    TESTING_FREE(        w1);
    TESTING_FREE(        w2);
    TESTING_HOSTFREE( rwork);
    TESTING_FREE(     iwork);
    TESTING_HOSTFREE(h_work);
    TESTING_HOSTFREE(   h_R);
    TESTING_HOSTFREE(   h_S);

    /* Shutdown */
#ifdef USE_MGPU
    TESTING_CUDA_FINALIZE_MGPU();
#else
     TESTING_CUDA_FINALIZE();
#endif
}
Beispiel #26
0
double line_segment_distance(double seg1_a[3], double seg1_b[3],
			   double seg2_a[3], double seg2_b[3])
{
  double sc, sd, sn, tc, td, tn;
  double uu, uv, vv, uw, vw, det;
  double u[3], v[3], w[3];

  vec_subt(u, seg1_b, seg1_a);
  vec_subt(v, seg2_b, seg2_a);
  vec_subt(w, seg1_a, seg2_a);

  uu = vec_dot(u, u);
  uv = vec_dot(u, v);
  vv = vec_dot(v, v);
  uw = vec_dot(u, w);
  vw = vec_dot(v, w);
  
  det = uu*vv - sqr(uv);
  sd = det;
  td = det;

  if (det < ALMOST_ZERO) {
    sn = 0.0;
    sd = 1.0;
    tn = vw;
    td = vv;
  } else {
    sn = uv*vw - vv*uw;
    tn = uu*vw - uv*uw;
    if (sn < 0.0) {
      sn = 0.0;
      tn = vw;
      td = vv;
    } else if (sn > sd) {
      sn = sd;
      tn = vw + uv;
      td = vv;
    }
  }

  if (tn < 0.0) {
    tn = 0.0;
    if (-uw < 0.0) {
      sn = 0.0;
    } else if (-uw > uu) {
      sn = sd;
    } else {
      sn = -uw;
      sd = uu;
    }
  } else if (tn > td) {
    tn = td;
    if (uv-uw < 0.0) {
      sn = 0.0;
    } else if (uv-uw > uu) {
      sn = sd;
    } else {
      sn = uv-uw;
      sd = uu;
    }
  }

  sc = (absv(sn) < ALMOST_ZERO ? 0.0 : sn / sd);
  tc = (absv(tn) < ALMOST_ZERO ? 0.0 : tn / td);

  vs_mult(u, sc);
  vs_mult(v, tc);
  vec_plus(w, w, u);
  vec_subt(w, w, v);

  return vec_dot(w, w);
}
Beispiel #27
0
void igrfmodel(VEC *bv, double *geo, double *Nmax_loc)
{
    /* Local variables */
    register double *d1, *d2;
    int imax, nmax, N;
    double  f, h[NCOEF]; /* KV */
    int i, k, m;
    double  s, x, y, z;
    int ihmax, ih, il;
    double  xi[3], rq;
    int ihm, ilm;
    double  srq;

    srq=absv(geo); //square root of position vector magnitude
    rq = srq*srq; //magnitude of vector
    
    N = (int) *Nmax_loc; // store the max order value in a local variable
    if (rq < .8) { // 0.8 * earth radius is the limit to determine the altitude to be above or below earth surface
      //printf ("igrf call below surface !!!\n");
    }
    
	
	// number of harmonics depends on the distance from the earth 
    rq = 1. / rq;
	//TODO static INLINE arm_status arm_sqrt_f32
    srq = sqrt(rq);
    if (rq < 0.25)
	nmax = (N - 3) * 4.0 * rq + 3;
    else
	nmax = N;
    
    
    //what is xi?

    for (d1 = xi, d2 = geo; d1 < xi + 3; )
	*d1++ = *d2++ * rq;//position unit vector
    
    ihmax = nmax * nmax; //total number of COEFFS including g and h coeffs
    imax = nmax + nmax - 2; //max index for m?
    il = ihmax + nmax + nmax;//max index for ?

    d1 = h + ihmax;	//max index address for h[NCOEF]
    d2 = Gh + ihmax; //max index address for Gh[NCOEF]
    for ( ; d1 <= h + il; )
	*d1++ = *d2++;//last 26 entries of h and Gh are made equal

    for (k = 0; k < 3; k += 2) {//runs only 2 times ... for 0 and 2
	i = imax;
	ih = ihmax;
	while (i >= k) {
	    il = ih - i - 1;
	    f = 2. / (double) (i - k + 2);
	    x = xi[0] * f;
	    y = xi[1] * f;
	    z = xi[2] * (f + f);
	    i += -2;
	    if (i >= 2) {
		for (m = 3; m <= i + 1; m += 2) {
		    ihm = ih + m;
		    ilm = il + m;
		    h[ilm+1] = Gh[ilm+1]+z*h[ihm+1]+x*(h[ihm+3]-h[ihm-1])-y*(h[ihm+2]+h[ihm-2]);
		    h[ilm] = Gh[ilm]+z*h[ihm]+x*(h[ihm+2]-h[ihm-2])+y*(h[ihm+3]+h[ihm-1]);
		}
		h[il+2] = Gh[il+2]+z*h[ih+2]+x*h[ih+4]-y*(h[ih+3]+h[ih]);
		h[il+1] = Gh[il+1]+z*h[ih+1]+y*h[ih+4]+x*(h[ih+3]-h[ih]);
	    } else if (i == 0) {
		h[il + 2] = Gh[il+2]+z*h[ih+2]+x*h[ih+4]-y*(h[ih+3]+h[ih]);
		h[il+1] = Gh[il+1]+z*h[ih+1]+y*h[ih+4]+x*(h[ih+3]-h[ih]);
	    }
	    h[il] = Gh[il]+z*h[ih]+(x*h[ih+1]+y*h[ih+2])*2.;
	    ih = il;
	}
    }

    s = h[0]*.5+(h[1]*xi[2]+h[2]*xi[0]+h[3]*xi[1])*2.;
    f = (rq+rq)*srq;
    x = f*(h[2]-s*(*(geo+0)))*1e-9;
    y = f*(h[3]-s*(*(geo+1)))*1e-9;
    z = f*(h[1]-s*(*(geo+2)))*1e-9;

    bv->ve[0] = x;
    bv->ve[1] = y;
    bv->ve[2] = z;
    /* *(bv+3) = sqrt(x*x+y*y+z*z); */ 	

}
Beispiel #28
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dsygvdx
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gpu_time /*cpu_time*/;
    double *h_A, *h_R, *h_B, *h_S, *h_work;
    double *w1, *w2, vl=0, vu=0;
    double result[2] = {0};
    magma_int_t *iwork;
    magma_int_t N, n2, info, il, iu, m1, m2, nb, lwork, liwork;
    double c_zero    = MAGMA_D_ZERO;
    double c_one     = MAGMA_D_ONE;
    double c_neg_one = MAGMA_D_NEG_ONE;
    #if defined(PRECISION_z) || defined(PRECISION_c)
    double *rwork;
    magma_int_t lrwork;
    #endif
    //double d_one         =  1.;
    //double d_ten         = 10.;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;

    magma_opts opts;
    parse_opts( argc, argv, &opts );

    double tol    = opts.tolerance * lapackf77_dlamch("E");
    double tolulp = opts.tolerance * lapackf77_dlamch("P");
    
    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }
    
    printf("using: itype = %d, jobz = %s, uplo = %s, check = %d, fraction = %6.4f\n",
           (int) opts.itype, lapack_vec_const(opts.jobz), lapack_uplo_const(opts.uplo),
           (int) opts.check, opts.fraction);

    printf("    N     M   GPU Time (sec)\n");
    printf("============================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            n2     = N*N;
            nb     = magma_get_dsytrd_nb(N);
            #if defined(PRECISION_z) || defined(PRECISION_c)
            lwork  = 2*N*nb + N*N;
            lrwork = 1 + 5*N +2*N*N;
            #else
            lwork  = 1 + 6*N*nb + 2* N*N;
            #endif
            liwork = 3 + 5*N;

            if ( opts.fraction == 0 ) {
                il = N / 10;
                iu = N / 5+il;
            }
            else {
                il = 1;
                iu = (int) (opts.fraction*N);
                if (iu < 1) iu = 1;
            }

            TESTING_MALLOC_CPU( h_A,    double, n2     );
            TESTING_MALLOC_CPU( h_B,    double, n2     );
            TESTING_MALLOC_CPU( w1,     double,             N      );
            TESTING_MALLOC_CPU( w2,     double,             N      );
            TESTING_MALLOC_CPU( iwork,  magma_int_t,        liwork );
            
            TESTING_MALLOC_PIN( h_R,    double, n2     );
            TESTING_MALLOC_PIN( h_S,    double, n2     );
            TESTING_MALLOC_PIN( h_work, double, lwork  );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_MALLOC_PIN( rwork, double, lrwork);
            #endif
            
            /* Initialize the matrix */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            lapackf77_dlarnv( &ione, ISEED, &n2, h_B );
            magma_dmake_hpd( N, h_B, N );
            magma_dmake_symmetric( N, h_A, N );

            // ==================================================================
            // Warmup using MAGMA
            // ==================================================================
            if(opts.warmup){
                lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
                
                magma_dsygvdx( opts.itype, opts.jobz, MagmaRangeI, opts.uplo,
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                               h_work, lwork,
                               #if defined(PRECISION_z) || defined(PRECISION_c)
                               rwork, lrwork,
                               #endif      
                               iwork, liwork,
                               &info );
                if (info != 0)
                    printf("magma_dsygvdx returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
            }
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
            lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );

            gpu_time = magma_wtime();
            magma_dsygvdx( opts.itype, opts.jobz, MagmaRangeI, opts.uplo,
                           N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                           h_work, lwork,
                           #if defined(PRECISION_z) || defined(PRECISION_c)
                           rwork, lrwork,
                           #endif
                           iwork, liwork,
                           &info );
            gpu_time = magma_wtime() - gpu_time;
            if (info != 0)
                printf("magma_dsygvdx returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            if ( opts.check ) {
                /* =====================================================================
                   Check the results following the LAPACK's [zc]hegvdx routine.
                   A x = lambda B x is solved
                   and the following 3 tests computed:
                   (1)    | A Z - B Z D | / ( |A||Z| N )  (itype = 1)
                          | A B Z - Z D | / ( |A||Z| N )  (itype = 2)
                          | B A Z - Z D | / ( |A||Z| N )  (itype = 3)
                   (2)    | S(with V) - S(w/o V) | / | S |
                   =================================================================== */
                #if defined(PRECISION_d) || defined(PRECISION_s)
                double *rwork = h_work + N*N;
                #endif
                double temp1, temp2;
                
                result[0] = 1.;
                result[0] /= lapackf77_dlansy("1", lapack_uplo_const(opts.uplo), &N, h_A, &N, rwork);
                result[0] /= lapackf77_dlange("1", &N, &m1, h_R, &N, rwork);
                
                if (opts.itype == 1) {
                    blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_dscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N);
                    result[0] *= lapackf77_dlange("1", &N, &m1, h_work, &N, rwork)/N;
                }
                else if (opts.itype == 2) {
                    blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_dscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N);
                    result[0] *= lapackf77_dlange("1", &N, &m1, h_R, &N, rwork)/N;
                }
                else if (opts.itype == 3) {
                    blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_dscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_dsymm("L", lapack_uplo_const(opts.uplo), &N, &m1, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N);
                    result[0] *= lapackf77_dlange("1", &N, &m1, h_R, &N, rwork)/N;
                }
                
                lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                lapackf77_dlacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
                
                magma_dsygvdx( opts.itype, MagmaNoVec, MagmaRangeI, opts.uplo,
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m2, w2,
                               h_work, lwork,
                               #if defined(PRECISION_z) || defined(PRECISION_c)
                               rwork, lrwork,
                               #endif
                               iwork, liwork,
                               &info );
                if (info != 0)
                    printf("magma_dsygvdx returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                temp1 = temp2 = 0;
                for(int j=0; j < m2; j++) {
                    temp1 = max(temp1, absv(w1[j]));
                    temp1 = max(temp1, absv(w2[j]));
                    temp2 = max(temp2, absv(w1[j]-w2[j]));
                }
                result[1] = temp2 / (((double)m2)*temp1);
            }
            
            /* =====================================================================
               Print execution time
               =================================================================== */
            printf("%5d %5d   %7.2f\n",
                   (int) N, (int) m1, gpu_time);
            if ( opts.check ) {
                printf("Testing the eigenvalues and eigenvectors for correctness:\n");
                if (opts.itype == 1) {
                    printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed"));
                }
                else if (opts.itype == 2) {
                    printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed"));
                }
                else if (opts.itype == 3) {
                    printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %8.2e   %s\n",   result[0], (result[0] < tol    ? "ok" : "failed"));
                }
                printf(    "(2)    | D(w/ Z) - D(w/o Z) | / |D|  = %8.2e   %s\n\n", result[1], (result[1] < tolulp ? "ok" : "failed"));
                status += ! (result[0] < tol && result[1] < tolulp);
            }
            
            TESTING_FREE_CPU( h_A );
            TESTING_FREE_CPU( h_B );
            TESTING_FREE_CPU( w1  );
            TESTING_FREE_CPU( w2  );
            TESTING_FREE_CPU( iwork );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_S    );
            TESTING_FREE_PIN( h_work );
            #if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_FREE_PIN( rwork );
            #endif
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}
Beispiel #29
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing ssygvdx
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gpu_time /*cpu_time*/;
    float *h_A, *h_R, *h_B, *h_S, *h_work;
    float *w1, *w2, vl=0, vu=0;
    float result[2] = {0};
    magma_int_t *iwork;
    magma_int_t N, n2, info, il, iu, m1, m2, nb, lwork, liwork;
    float c_zero    = MAGMA_S_ZERO;
    float c_one     = MAGMA_S_ONE;
    float c_neg_one = MAGMA_S_NEG_ONE;
#if defined(PRECISION_z) || defined(PRECISION_c)
    float *rwork;
    magma_int_t lrwork;
#endif
    //float d_one         =  1.;
    //float d_ten         = 10.;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};

    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    float tol    = opts.tolerance * lapackf77_slamch("E");
    float tolulp = opts.tolerance * lapackf77_slamch("P");
    
    if ( opts.check && opts.jobz == MagmaNoVec ) {
        fprintf( stderr, "checking results requires vectors; setting jobz=V (option -JV)\n" );
        opts.jobz = MagmaVec;
    }
    
    printf("    N     M   GPU Time (sec)\n");
    printf("============================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            n2     = N*N;
            nb     = magma_get_ssytrd_nb(N);
#if defined(PRECISION_z) || defined(PRECISION_c)
            lwork  = 2*N*nb + N*N;
            lrwork = 1 + 5*N +2*N*N;
#else
            lwork  = 1 + 6*N*nb + 2* N*N;
#endif
            liwork = 3 + 5*N;

            if ( opts.fraction == 0 ) {
                il = N / 10;
                iu = N / 5+il;
            }
            else {
                il = 1;
                iu = (int) (opts.fraction*N);
                if (iu < 1) iu = 1;
            }

            TESTING_MALLOC(    h_A,    float, n2     );
            TESTING_MALLOC(    h_B,    float, n2     );
            TESTING_MALLOC(    w1,     float,          N      );
            TESTING_MALLOC(    w2,     float,          N      );
            TESTING_MALLOC(    iwork,  magma_int_t,     liwork );
            TESTING_HOSTALLOC( h_R,    float, n2     );
            TESTING_HOSTALLOC( h_S,    float, n2     );
            TESTING_HOSTALLOC( h_work, float, lwork  );
#if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_HOSTALLOC( rwork,          float, lrwork);
#endif
            
            /* Initialize the matrix */
            lapackf77_slarnv( &ione, ISEED, &n2, h_A );
            lapackf77_slarnv( &ione, ISEED, &n2, h_B );
            /* increase the diagonal */
            for(int i=0; i<N; i++) {
                MAGMA_S_SET2REAL( h_B[i*N+i], ( MAGMA_S_REAL(h_B[i*N+i]) + 1.*N ) );
                MAGMA_S_SET2REAL( h_A[i*N+i], MAGMA_S_REAL(h_A[i*N+i]) );
            }


            // ==================================================================
            // Warmup using MAGMA
            // ==================================================================
            if(opts.warmup){
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
                
                magma_ssygvdx( opts.itype, opts.jobz, 'I', opts.uplo,
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                               h_work, lwork,
#if defined(PRECISION_z) || defined(PRECISION_c)
                               rwork, lrwork,
#endif      
                               iwork, liwork,
                               &info );
                if (info != 0)
                    printf("magma_ssygvdx returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
            }
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
            lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );

            gpu_time = magma_wtime();
            magma_ssygvdx( opts.itype, opts.jobz, 'I', opts.uplo,
                           N, h_R, N, h_S, N, vl, vu, il, iu, &m1, w1,
                           h_work, lwork,
#if defined(PRECISION_z) || defined(PRECISION_c)
                           rwork, lrwork,
#endif
                           iwork, liwork,
                           &info );
            gpu_time = magma_wtime() - gpu_time;
            if (info != 0)
                printf("magma_ssygvdx returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            if ( opts.check ) {
                /* =====================================================================
                   Check the results following the LAPACK's [zc]hegvdx routine.
                   A x = lambda B x is solved
                   and the following 3 tests computed:
                   (1)    | A Z - B Z D | / ( |A||Z| N )  (itype = 1)
                          | A B Z - Z D | / ( |A||Z| N )  (itype = 2)
                          | B A Z - Z D | / ( |A||Z| N )  (itype = 3)
                   (2)    | S(with V) - S(w/o V) | / | S |
                   =================================================================== */
#if defined(PRECISION_d) || defined(PRECISION_s)
                float *rwork = h_work + N*N;
#endif
                float temp1, temp2;
                
                result[0] = 1.;
                result[0] /= lapackf77_slansy("1", &opts.uplo, &N, h_A, &N, rwork);
                result[0] /= lapackf77_slange("1", &N, &m1, h_R, &N, rwork);
                
                if (opts.itype == 1) {
                    blasf77_ssymm("L", &opts.uplo, &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_sscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_ssymm("L", &opts.uplo, &N, &m1, &c_neg_one, h_B, &N, h_R, &N, &c_one, h_work, &N);
                    result[0] *= lapackf77_slange("1", &N, &m1, h_work, &N, rwork)/N;
                }
                else if (opts.itype == 2) {
                    blasf77_ssymm("L", &opts.uplo, &N, &m1, &c_one, h_B, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_sscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_ssymm("L", &opts.uplo, &N, &m1, &c_one, h_A, &N, h_work, &N, &c_neg_one, h_R, &N);
                    result[0] *= lapackf77_slange("1", &N, &m1, h_R, &N, rwork)/N;
                }
                else if (opts.itype == 3) {
                    blasf77_ssymm("L", &opts.uplo, &N, &m1, &c_one, h_A, &N, h_R, &N, &c_zero, h_work, &N);
                    for(int i=0; i < m1; ++i)
                        blasf77_sscal(&N, &w1[i], &h_R[i*N], &ione);
                    blasf77_ssymm("L", &opts.uplo, &N, &m1, &c_one, h_B, &N, h_work, &N, &c_neg_one, h_R, &N);
                    result[0] *= lapackf77_slange("1", &N, &m1, h_R, &N, rwork)/N;
                }
                
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_A, &N, h_R, &N );
                lapackf77_slacpy( MagmaUpperLowerStr, &N, &N, h_B, &N, h_S, &N );
                
                magma_ssygvdx( opts.itype, 'N', 'I', opts.uplo,
                               N, h_R, N, h_S, N, vl, vu, il, iu, &m2, w2,
                               h_work, lwork,
#if defined(PRECISION_z) || defined(PRECISION_c)
                               rwork, lrwork,
#endif
                               iwork, liwork,
                               &info );
                if (info != 0)
                    printf("magma_ssygvdx returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                temp1 = temp2 = 0;
                for(int j=0; j < m2; j++) {
                    temp1 = max(temp1, absv(w1[j]));
                    temp1 = max(temp1, absv(w2[j]));
                    temp2 = max(temp2, absv(w1[j]-w2[j]));
                }
                result[1] = temp2 / (((float)m2)*temp1);
            }
            
            /* =====================================================================
               Print execution time
               =================================================================== */
            printf("%5d %5d   %7.2f\n",
                   (int) N, (int) m1, gpu_time);
            if ( opts.check ) {
                printf("Testing the eigenvalues and eigenvectors for correctness:\n");
                if (opts.itype==1)
                    printf("(1)    | A Z - B Z D | / (|A| |Z| N) = %8.2e%s\n", result[0], (result[0] < tol ? "" : "  failed"));
                else if (opts.itype==2)
                    printf("(1)    | A B Z - Z D | / (|A| |Z| N) = %8.2e%s\n", result[0], (result[0] < tol ? "" : "  failed"));
                else if (opts.itype==3)
                    printf("(1)    | B A Z - Z D | / (|A| |Z| N) = %8.2e%s\n", result[0], (result[0] < tol ? "" : "  failed"));
                printf(    "(2)    | D(w/ Z) - D(w/o Z) | / |D|  = %8.2e%s\n\n", result[1], (result[1] < tolulp ? "" : "  failed"));
            }
            
            TESTING_FREE( h_A   );
            TESTING_FREE( h_B   );
            TESTING_FREE( w1    );
            TESTING_FREE( w2    );
#if defined(PRECISION_z) || defined(PRECISION_c)
            TESTING_HOSTFREE( rwork);
#endif
            TESTING_FREE( iwork );
            TESTING_HOSTFREE( h_work );
            TESTING_HOSTFREE( h_R    );
            TESTING_HOSTFREE( h_S    );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return 0;
}
Beispiel #30
0
int berank(const void *p1, const void *p2)
{
  real e1 = 0.5 * rsqr(absv(Vel((bodyptr) p1))) + Phi((bodyptr) p1);
  real e2 = 0.5 * rsqr(absv(Vel((bodyptr) p2))) + Phi((bodyptr) p2);
  return (e1 < e2 ? -1 : e1 > e2 ? 1 : 0);
}