예제 #1
0
int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo,
		       val_t *Wstruct, vec_t *Zstruct, 
		       tol_t *tolstruct, counter_t *num_left, 
		       double *work, int *iwork)
{
  /* Inputs */
  int    begin         = sng->begin; 
  int    end           = sng->end;
  int    bl_begin      = sng->bl_begin;
  int    bl_end        = sng->bl_end;
  int    bl_size       = bl_end - bl_begin + 1;
  double bl_spdiam     = sng->bl_spdiam; 
  rrr_t  *RRR          = sng->RRR;
  double *restrict D   = RRR->D; 
  double *restrict L   = RRR->L; 
  double *restrict DL  = RRR->DL;
  double *restrict DLL = RRR->DLL;

  int              pid      = procinfo->pid;  
  int              n        = Wstruct->n;
  double *restrict W        = Wstruct->W;
  double *restrict Werr     = Wstruct->Werr;
  double *restrict Wgap     = Wstruct->Wgap;
  int    *restrict Windex   = Wstruct->Windex;  
  int    *restrict iproc    = Wstruct->iproc;  
  double *restrict Wshifted = Wstruct->Wshifted;
  int              ldz      = Zstruct->ldz;
  double *restrict Z        = Zstruct->Z;
  int    *restrict isuppZ   = Zstruct->Zsupp;;
  int    *restrict Zindex   = Zstruct->Zindex;
  double           pivmin   = tolstruct->pivmin;

  /* others */
  int              info, i, k, itmp, num_decrement=0;
  int              IONE = 1;
  double           DZERO = 0.0;
  double           tol, lambda, left, right;
  int              i_local, zind;
  double           gap, lgap, rgap, gaptol, savedgap, tmp;
  int             usedBS, usedRQ, needBS, wantNC, step2II;
  int              r, offset;
  double           twoeps = 2*DBL_EPSILON, RQtol = 2*DBL_EPSILON;
  double           residual, bstres, bstw; 
  int              i_supmn, i_supmx;
  double           RQcorr;
  int              negcount;
  int              sgndef, suppsize;
  double           sigma;
  int              i_Zfrom, i_Zto;
  double           ztz, norminv, mingma;


  /* set tolerance parameter */
  tol  = 4.0 * log( (double) bl_size ) * DBL_EPSILON;

  /* loop over all singletons in the task */
  for (i=begin; i<=end; i++) {

    /* check if eigenvector is supposed to be computed by
     * the process */
    if (iproc[i] != pid)
      continue;
    num_decrement++;

    if (bl_size == 1) {
      /* set eigenvector to column of identity matrix */
      zind = Zindex[i];
      memset(&Z[zind*ldz], 0.0, n*sizeof(double) );
      Z[zind*ldz + bl_begin] = 1.0;
      isuppZ[2*zind    ]     = bl_begin + 1;
      isuppZ[2*zind + 1]     = bl_begin + 1;
      continue;
    }

    lambda  = Wshifted[i];  
    left    = Wshifted[i] - Werr[i];
    right   = Wshifted[i] + Werr[i];
    i_local = Windex[i];
    r       = 0;
    
    /* compute left and right gap */
    if (i == bl_begin)
      lgap = DBL_EPSILON * fmax( fabs(left), fabs(right) );
    else if (i == begin)
      lgap = sng->lgap;
    else
      lgap = Wgap[i-1];

    if (i == bl_end) {
      rgap = DBL_EPSILON * fmax( fabs(left), fabs(right) );
    } else {
      rgap = Wgap[i];
    }

    gap = fmin(lgap, rgap);

    if ( i == bl_begin || i == bl_end ) {
      gaptol = 0.0;
    } else {
      gaptol = gap * DBL_EPSILON;
    }

    /* initialize lower and upper value of support */
    i_supmn = bl_size;
    i_supmx = 1;

    /* update Wgap so that it holds minimum gap and save the 
     * old value */
    savedgap  = Wgap[i];
    Wgap[i]   = gap;
    
    /* initialize flags indicating if bisection or Rayleigh-Quotient
     * correction was used */
    usedBS = false;
    usedRQ = false;
  
    /* the need for bisection is initially turned off */
    needBS = !TRY_RQC;

    /* IEEE floating point is assumed, so that all 0 bits are 0.0 */
    zind = Zindex[i];
    memset(&Z[zind*ldz], 0.0, n*sizeof(double));

    /* inverse iteration with twisted factorization */
    for (k=1; k<=MAXITER; k++) {

      if (needBS == true) {
	usedBS = true;
	itmp   = r;
	
	offset  = Windex[i] - 1;
	tmp     = Wgap[i]; 
	Wgap[i] = 0.0;
	
	odrrb_(&bl_size, D, DLL, &i_local, &i_local, &DZERO, 
		&twoeps, &offset, &Wshifted[i], &Wgap[i],
		&Werr[i], work, iwork, &pivmin, &bl_spdiam,
		&itmp, &info);
	assert(info == 0);
	
	Wgap[i] = tmp;
	lambda = Wshifted[i];
	r = 0;
      }
      wantNC = (usedBS == true) ? false : true;

      /* compute the eigenvector corresponding to lambda */
      odr1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
	      &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
	      &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
	      &norminv, &residual, &RQcorr, work);

      if (k == 1) {
	bstres = residual;
	bstw   = lambda;
      } else if (residual < bstres) {
	bstres = residual;
	bstw   = lambda;
      }
      
      /* update support held */
      i_supmn = imin(i_supmn, isuppZ[2*zind    ]);
      i_supmx = imax(i_supmx, isuppZ[2*zind + 1]);

      /* Convergence test for Rayleigh Quotient Iteration
       * not done if bisection was used */
      if ( !usedBS && residual > tol*gap 
	   && fabs(RQcorr) > RQtol*fabs(lambda) ) {
      
	if (i_local <= negcount) {
	  sgndef = -1;    /* wanted eigenvalue lies to the left  */
	} else {
	  sgndef =  1;    /* wanted eigenvalue lies to the right */
	}
	
	if ( RQcorr*sgndef >= 0.0
	     && lambda+RQcorr <= right 
	     && lambda+RQcorr >= left ) {
	  usedRQ = true;
	  if ( sgndef == 1 )
	    left  = lambda;
	  else
	    right = lambda;
	  Wshifted[i] = 0.5*(left + right);
	  lambda     += RQcorr;
	} else { /* bisection is needed */
	  needBS = true;
	}
	
	if ( right-left < RQtol*fabs(lambda) ) {
	  /* eigenvalue computed to bisection accuracy
	   * => compute eigenvector */
	  usedBS = true;
	} else if ( k == MAXITER-1 ) {
	  /* for last iteration use bisection */
	  needBS = true;
	}
      } else {
	/* go to next iteration */
	break;
      }

    } /* end k */

    /* if necessary call odr1v to improve error angle by 2nd step */
    step2II = false;
    if ( usedRQ && usedBS && (bstres <= residual) ) {
      lambda = bstw;
      step2II = true;
    }
    if ( step2II == true ) {
      odr1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL,
	      &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC,
	      &negcount, &ztz, &mingma, &r, &isuppZ[2*zind],
	      &norminv, &residual, &RQcorr, work);
    }
    Wshifted[i] = lambda;

    /* compute support w.r.t. whole matrix
     * block beginning is offset for each support */
    isuppZ[2*zind    ] += bl_begin;
    isuppZ[2*zind + 1] += bl_begin;
  
    /* ensure vector is okay if support changed in RQI 
     * minus ones because of indices starting from zero */
    i_Zfrom    = isuppZ[2*zind    ] - 1;
    i_Zto      = isuppZ[2*zind + 1] - 1;
    i_supmn   += bl_begin - 1;
    i_supmx   += bl_begin - 1;
    if ( i_supmn < i_Zfrom ) {
      for ( k=i_supmn; k < i_Zfrom; k++ ) {
	Z[k + zind*ldz] = 0.0;
      }
    }
    if ( i_supmx > i_Zto ) {
      for ( k=i_Zto+1; k <= i_supmx; k++ ) {
	Z[k + zind*ldz] = 0.0;
      }
    }
    
    /* normalize eigenvector */
    suppsize = i_Zto - i_Zfrom + 1;
    odscl_(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE);

    sigma = L[bl_size-1];
    W[i]  = lambda + sigma;
    
    if (i < end)
      Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]);

  } /* end i */

  /* decrement counter */
  PMR_decrement_counter(num_left, num_decrement);

  /* clean up */
  free(sng);
  PMR_try_destroy_rrr(RRR);

  return(0);
}
예제 #2
0
int PMR_process_r_task(refine_t *rf, int tid, val_t *Wstruct, 
		       tol_t *tolstruct, double *work, int *iwork)
{
  int    rf_begin  = rf->begin;
  double *D        = rf->D;
  double *DLL      = rf->DLL;
  int    p         = rf->p;
  int    q         = rf->q;
  int    bl_size   = rf->bl_size;
  double bl_spdiam = rf->bl_spdiam;
  subtasks_t  *sts = rf->sts;

  double *Wshifted = Wstruct->Wshifted;
  double *Werr     = Wstruct->Werr;
  double *Wgap     = Wstruct->Wgap;
  int    *Windex   = Wstruct->Windex;
  double rtol1     = tolstruct->rtol1;
  double rtol2     = tolstruct->rtol2;
  double pivmin    = tolstruct->pivmin;

  /* Others */
  int info, offset, taskcount, rf_end, i;
  double sigma;
  double *restrict L;
  double *restrict W;

  offset = Windex[rf_begin] - 1;

  /* Bisection to refine the eigenvalues */
  dlarrb_(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2,
	  &offset, &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin],
	  work, iwork, &pivmin, &bl_spdiam, &bl_size, &info);
  assert(info == 0);

  taskcount = PMR_decrement_counter(sts->counter, 1);
  
  if (taskcount == 0) {
    L = sts->RRR->L;
    W = Wstruct->W;
    rf_begin = sts->cl->begin;
    for (i=0; i<sts->num_tasks; i++) {
      rf_end = rf_begin + sts->chunk - 1;
      
      Wgap[rf_end] = Wshifted[rf_end + 1] - Werr[rf_end + 1]
	- Wshifted[rf_end] - Werr[rf_end];
      
      rf_begin = rf_end + 1;
    }
    sigma = L[bl_size-1];
    
    /* refined eigenvalues with all shifts applied in W */
    for ( i=sts->cl->begin; i<=sts->cl->end; i++ ) {
      W[i] = Wshifted[i] + sigma;
    }
    
    /* create subtasks */
    info = PMR_create_subtasks(sts->cl, tid, sts->nthreads, sts->num_left, 
			       sts->workQ, sts->RRR, Wstruct, 
			       sts->Zstruct, tolstruct, work, iwork);
    assert(info == 0);

    PMR_destroy_counter(sts->counter); 
    free(sts);
  }
  
  free(rf);

  return(0);
}