Exemple #1
0
static 
int eigval_subset_proc(proc_t *procinfo, char *range, in_t *Dstruct,
		       double *E2, int ifirst, int ilast, tol_t *tolstruct,
		       val_t *Wstruct, double *work,
		       int *iwork)
{
  /* Input parameter */
  int              max_nthreads = procinfo->nthreads;
  int              n            = Dstruct->n;
  double *restrict D            = Dstruct->D;
  double *restrict E            = Dstruct->E;
  int              nsplit       = Dstruct->nsplit;
  int    *restrict isplit       = Dstruct->isplit;
  int              isize        = ilast-ifirst+1;
  double *restrict W            = Wstruct->W;
  double *restrict Werr         = Wstruct->Werr;
  double *restrict Wgap         = Wstruct->Wgap;
  int    *restrict Windex       = Wstruct->Windex;
  int    *restrict iblock       = Wstruct->iblock;
  double *restrict gersch       = Wstruct->gersch;
  double           pivmin       = tolstruct->pivmin;

  double gl, gu, wl, wu;

  /* Tolerances */
  double bsrtol, rtl;

  /* Multithreading */
  int            nthreads;
  int            iifirst, iilast, chunk;
  pthread_t      *threads;
  pthread_attr_t attr;
  auxarg1_t      *auxarg1;
  auxarg2_t      *auxarg2;
  void           *status;

  /* Create random vector to perturb rrr, same seed */
  int    two_n = 2*n;
  int    iseed[4] = {1,1,1,1};
  double *randvec;

  /* loop over blocks */
  int    jbl, num_vals;
  int    bl_begin,  bl_end, bl_size;
  int    bl_Wbegin, bl_Wend;
  double isleft, isright, spdiam;
  int    i_low, i_upp, bl_m;
  double sigma, s1, s2;
  int    sgndef, cnt, negcnt_lft, negcnt_rgt;
  double tau;

  /* Compute RRR */
  int    jtry, off_L, off_invD;
  double Dpivot, Dmax;
  bool   noREP;

  /* Refine eigenvalues */
  int    off_DE2, offset;
  int    rf_begin, rf_end;

  /* Others */
  int    IONE = 1, ITWO = 2;
  int    info, m, i, j;
  double dummy, tmp, tmp1, tmp2;

  /* Allocate workspace */
  randvec = (double *) malloc( 2*n * sizeof(double) );
  assert(randvec != NULL);

  threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) );
  assert(threads != NULL);
  
  if (max_nthreads > 1) {
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
  }

  /* Set tolerance parameters */
  bsrtol = sqrt(DBL_EPSILON);
  rtl    = sqrt(DBL_EPSILON);

  /* create random vector to perturb rrr and broadcast it */
  LAPACK(dlarnv)(&ITWO, iseed, &two_n, randvec);

  /* compute approximations of the eigenvalues with muliple threads
   * equivalent to:
   * LAPACK(dlarrd)
   * ("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch,
   *  &bsrtol, D, E, E2, &pivmin, &nsplit, isplit, &m, W, Werr,
   *  &wl, &wu, iblock, Windex, work, iwork, &info);
   * assert(info == 0);
   * assert(m == ilast-ifirst+1); */

  nthreads = max_nthreads;
  while (nthreads > 1 && isize / nthreads < 2)
    nthreads--;

  if (nthreads > 1) {

    /* each threads computes W[iifirst:iilast] and places them in
     * work[0:n-1]; the corresponding errors in work[n:2*n-1];
     * the blocks they belong in iwork[0:n-1]; and their indices in
     * iwork[n:2*n-1]; */ 

    iifirst = ifirst;
    chunk = isize / nthreads;
    for (i=1; i<nthreads; i++) {

      iilast = iifirst + chunk - 1; 
      
      auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast,
			       nsplit, isplit, bsrtol, pivmin, gersch,
			       &work[0], &work[n], &iwork[n], &iwork[0]);

      info = pthread_create(&threads[i], &attr,
			    eigval_subset_thread_a,
			    (void *) auxarg1);
      assert(info == 0);

      iifirst = iilast + 1;
    }
    iilast = ilast;

    auxarg1 = create_auxarg1(n, D, E, E2, ifirst, ilast, iifirst, iilast,
			     nsplit, isplit, bsrtol, pivmin, gersch,
			     &work[0], &work[n], &iwork[n], &iwork[0]);

    status = eigval_subset_thread_a( (void *) auxarg1 );
    assert(status == NULL);

    /* join threads */
    for (i=1; i<nthreads; i++) {
      info = pthread_join(threads[i], &status);
      assert(info == 0 && status == NULL);
    }

    /* sort, m counts the numbers of eigenvalues computed by process */
    m = 0;
    for (i=1; i<=nsplit; i++) {
      for (j=0; j<isize; j++) {
	if (iwork[j] == i) {
	  W[m]      = work[j];
	  Werr[m]   = work[j+n];
	  iblock[m] = iwork[j];
	  Windex[m] = iwork[j+n];
	  m++;
	}
      }
    }

  } else {
    /* no multithreaded computation */
    LAPACK(dlarrd)
    ("I", "B", &n, &dummy, &dummy, &ifirst, &ilast, gersch, &bsrtol, D, E, E2,
     &pivmin, &nsplit, isplit, &m, W, Werr, &wl, &wu, iblock, Windex, work, 
     iwork, &info);
    assert(info == 0);
    assert(m == ilast-ifirst+1);
  }


  /* loop over unreduced blocks */  
  num_vals  = m;
  m         = 0; /* accumulates eigenvalues found or refined */
  bl_begin  = 0;
  bl_Wbegin = 0;
  
  for (jbl=0; jbl<nsplit; jbl++) {
    
    bl_end  = isplit[jbl] - 1;
    bl_size = bl_end - bl_begin + 1;

    /* deal with 1x1 block immediately */
    if (bl_size == 1) {
      E[bl_end] = 0.0;
      /* if eigenvalue part of process work */
      if (iblock[bl_Wbegin] == jbl+1) {
	W[m]      = D[bl_begin];
	Werr[m]   = 0.0;
	Werr[m]   = 0.0;
	iblock[m] = jbl + 1;
	Windex[m] = 1;
	m++;
	bl_Wbegin++;
      }
      bl_begin  = bl_end + 1;
      continue;
    }


    /* COMPUTE ROOT RRR */

    /* store shift of initial RRR, here set to zero */
    E[bl_end] = 0.0;

    /* find outer bounds GL, GU for block and spectral diameter */
    gl = D[bl_begin];
    gu = D[bl_begin];
    for (i = bl_begin; i <= bl_end; i++) {
      gl = fmin(gl, gersch[2*i]  );
      gu = fmax(gu, gersch[2*i+1]);
    }
    spdiam = gu - gl;

    /* find approximation of extremal eigenvalues of the block
     * dlarrk computes one eigenvalue of tridiagonal matrix T
     * tmp1 and tmp2 one hold the eigenvalue and error, respectively */
    LAPACK(dlarrk)
    (&bl_size, &IONE, &gl, &gu, &D[bl_begin], &E2[bl_begin], &pivmin, &rtl, 
     &tmp1, &tmp2, &info);
    assert(info == 0);  /* if info=-1 => eigenvalue did not converge */
    
    isleft = fmax(gl, tmp1-tmp2 - HUNDRED*DBL_EPSILON*fabs(tmp1-tmp2) );
    
    LAPACK(dlarrk)
    (&bl_size, &bl_size, &gl, &gu, &D[bl_begin], &E2[bl_begin], &pivmin, &rtl,     &tmp1, &tmp2, &info);
    assert(info == 0);  /* if info=-1 => eigenvalue did not converge */
    
    isright = fmin(gu, tmp1+tmp2 + HUNDRED*DBL_EPSILON*fabs(tmp1+tmp2) );
    
    spdiam = isright - isleft;

    /* compute negcount at points s1 and s2 */
    s1 = isleft  + HALF   * spdiam;
    s2 = isright - FOURTH * spdiam;  /* not needed currently */

    /* compute negcount at points s1 and s2 */
    /* cnt = number of eigenvalues in (s1,s2] = count_right - count_left
     * negcnt_lft = number of eigenvalues smaller equals than s1
     * negcnt_rgt = number of eigenvalues smaller equals than s2 */
    LAPACK(dlarrc)
    ("T", &bl_size, &s1, &s2, &D[bl_begin], &E[bl_begin], &pivmin, &cnt, 
     &negcnt_lft, &negcnt_rgt, &info);
    assert(info == 0);

    /* if more of the desired eigenvectors are in the left part shift left
     * and the other way around */
    if ( negcnt_lft >= bl_size - negcnt_lft ) {
      /* shift left */
      sigma = isleft;
      sgndef = ONE;
    } else {
      /* shift right */
      sigma = isright;
      sgndef = -ONE;
    }

    /* define increment to perturb initial shift to find RRR
     * with not too much element growth */
    tau = spdiam*DBL_EPSILON*n + 2.0*pivmin;


    /* try to find initial RRR of block:
     * need work space of 3*n here to store D, L, D^-1 of possible
     * representation:
     * D_try      = work[0  :  n-1] 
     * L_try      = work[n  :2*n-1]
     * inv(D_try) = work[2*n:3*n-1] */

    off_L    = n;
    off_invD = 2*n;
    
    for (jtry = 0; jtry < MAX_TRY_RRRR; jtry++) {

      Dpivot  = D[bl_begin] - sigma;
      work[0] = Dpivot;
      Dmax    = fabs( work[0] );
      j = bl_begin;

      for (i = 0; i < bl_size-1; i++) {
 	work[i+off_invD] = 1.0 / work[i];
	tmp = E[j] * work[i+off_invD];
	work[i+off_L] = tmp;
	Dpivot = (D[j+1] - sigma) - tmp*E[j];
	work[i+1] = Dpivot;
	Dmax = fmax(Dmax, fabs(Dpivot) );
	j++;
      }
      
      /* except representation only if not too much element growth */
      if (Dmax > MAX_GROWTH*spdiam) {
	noREP = true;
      } else {
	noREP = false;
      }
      
      if (noREP == true) {
	/* if all eigenvalues are desired shift is made definite to use DQDS
	 * so we should not end here */
	if (jtry == MAX_TRY_RRRR-2) {
	  if (sgndef == ONE) { /* floating point comparison okay here */
	    sigma = gl - FUDGE_FACTOR*spdiam*DBL_EPSILON*n 
	               - FUDGE_FACTOR*2.0*pivmin;
	  } else {
	    sigma = gu + FUDGE_FACTOR*spdiam*DBL_EPSILON*n 
                       + FUDGE_FACTOR*2.0*pivmin;
	  }
	} else if (jtry == MAX_TRY_RRRR-1) {
	  fprintf(stderr,"No initial representation could be found.\n");
	  exit(3);
	} else {
	  sigma -= sgndef*tau;
	  tau   *= 2.0;
	  continue;
	}
      } else {   /* found representation */
	break;
      }  
    }
    /* end trying to find initial RRR of block */


    /* save initial RRR and corresponding shift */
    E[bl_end] = sigma;
    memcpy(&D[bl_begin], &work[0],  bl_size    * sizeof(double) );
    memcpy(&E[bl_begin], &work[n], (bl_size-1) * sizeof(double) );
    /* work[0:4*n-1] can now be used again for anything */


    /* perturb root rrr by small relative amount, first make sure
     * that at least two values are actually disturbed enough,
     * which might not be necessary */
    while( fabs(randvec[bl_begin])*RAND_FACTOR < 1.0 )
      randvec[bl_begin] *= 2.0;
    while( fabs(randvec[bl_end])  *RAND_FACTOR < 1.0 )
      randvec[bl_end]   *= 2.0;

    for (i=bl_begin; i<bl_end; i++) {
      D[i] *= 1.0 + DBL_EPSILON*RAND_FACTOR*randvec[i];
      E[i] *= 1.0 + DBL_EPSILON*RAND_FACTOR*randvec[i+n];
    }
    D[bl_end] *= 1.0 + DBL_EPSILON*RAND_FACTOR*randvec[bl_end];


    /* REFINE EIGENVALUES WITH REPECT TO RRR */

    /* count number of eigenvalues in block and find smallest
     * and largest index of block */
    bl_m  = 0;
    i_low = n;
    i_upp = 1;
    for (i=bl_Wbegin; i<num_vals; i++) {
      if (iblock[i] == jbl+1)  {
	bl_m++;
	i_low = imin(i_low, Windex[i]);
	i_upp = imax(i_upp, Windex[i]);
      } else {
	break;
      }
    }

    if (bl_m == 0) {
      bl_begin  = bl_end + 1;
      continue; /* go to next block */
    }

    /* last index of W to store eigenvalues of block */
    bl_Wend = bl_Wbegin + bl_m - 1;

    /* calculate gaps */
    for (i=bl_Wbegin; i<bl_Wend; i++) {
      Wgap[i] = fmax(0.0, (W[i+1] - Werr[i+1]) - (W[i] + Werr[i]) );
    }	
    
    Wgap[bl_Wend] = fmax(0.0, gu - (W[bl_Wend] + Werr[bl_Wend]) );
    
    /* shift eigenvalues to be consistent with dqds 
     * and compute eigenvalues of SHIFTED matrix */
    for (i=bl_Wbegin; i<=bl_Wend; i++) {
      W[i]    -= sigma;
      Werr[i] += fabs(W[i])*DBL_EPSILON;
    }

    /* work  for sequential dlarrb = work[0:2*n-1]
     * iwork for sequential dlarrb = iwork[0:2*n-1]
     * DE2 = work[2*n:3*n-1] strting at bl_begin */
    off_DE2 = 2*n;
    
    /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */
    for (i=bl_begin; i<bl_end; i++) {
      work[i+off_DE2] = D[i]*E[i]*E[i];
    }
    
    nthreads = max_nthreads;
    while (nthreads > 1 && bl_m/nthreads < 2) {
      nthreads--;
    }

    if (nthreads > 1) {

      rf_begin = bl_Wbegin;
      chunk    = bl_m / nthreads;
      for (i=1; i<nthreads; i++) {
	
	rf_end = rf_begin + chunk - 1; 

	auxarg2 = create_auxarg2(bl_size, &D[bl_begin],
				 &work[bl_begin+off_DE2],
				 rf_begin, rf_end, Wstruct,
				 tolstruct->rtol1, tolstruct->rtol2,
				 pivmin, spdiam);
	
	info = pthread_create(&threads[i], &attr,
			      eigval_subset_thread_r,
			      (void *) auxarg2);
	assert(info == 0);
	
	rf_begin = rf_end + 1;
      }
      rf_end = bl_Wend;

      auxarg2 = create_auxarg2(bl_size, &D[bl_begin],
			       &work[bl_begin+off_DE2],
			       rf_begin, rf_end, Wstruct,
			       tolstruct->rtol1, tolstruct->rtol2,
			       pivmin, spdiam);
      
      status = eigval_subset_thread_r( (void *) auxarg2 );
      assert(status == NULL);
    
      /* join threads */
      for (i=1; i<nthreads; i++) {
	info = pthread_join(threads[i], &status);
	assert(info == 0 && status == NULL);
      }
      /* should update gaps at splitting points here, but the gaps
       * will be recomputed anyway */
      
    } else {

      offset = i_low-1;
      
      /* refine eigenvalues found by dlarrd for i_low:i_upp */
      LAPACK(dlarrb)
      (&bl_size, &D[bl_begin], &work[bl_begin+off_DE2], &i_low, &i_upp, 
       &tolstruct->rtol1, &tolstruct->rtol2, &offset, &W[bl_Wbegin], 
       &Wgap[bl_Wbegin], &Werr[bl_Wbegin], work, iwork, &pivmin, &spdiam, 
       &bl_size, &info);
      assert(info == 0);
      /* needs work of dim(2*n) and iwork of dim(2*n) */
    }
    /* dlarrb computes gaps correctly, but not last one;
     * this is ignored since the gaps are recomputed anyway */
    
    /* this makes sure that the indices are in the right order */
    for (i=i_low; i<=i_upp; i++) {
      Windex[m] = i;
      m++;
    }

    /* proceed with next block */
    bl_begin  = bl_end  + 1;
    bl_Wbegin = bl_Wend + 1;
  }
  /* end of loop over unreduced blocks */  
  
  /* clean up */
  free(randvec);
  free(threads);

  if (max_nthreads > 1) {
    pthread_attr_destroy(&attr);
  }

  return(0);
}
Exemple #2
0
static 
int eigval_refine_proc
(proc_t *procinfo, int ifirst, int ilast, 
 int n, double *D, double *E, double *E2,  
 int *Windex, int *iblock, double *gersch, tol_t *tolstruct, 
 double *W, double *Werr, double *Wgap, double *work, int *iwork)
{
  /* Input parameter */
  int    isize  = ilast-ifirst+1;
  double pivmin = tolstruct->pivmin;

  /* Multithreading */
  int            nthreads;
  int            max_nthreads = procinfo->nthreads;
  int            chunk;
  pthread_t      *threads;
  pthread_attr_t attr;
  auxarg2_t      *auxarg2;

  int    info, i;

  /* Allocate space */
  threads = (pthread_t *) malloc( max_nthreads * sizeof(pthread_t) );
  assert(threads != NULL);
  int *isplit = (int *) malloc( n * sizeof(int) );
  assert(isplit != NULL);

  /* This is an unreduced block (nsplit=1) */
  isplit[0] = n;
  
  /* Prepare multi-threading */
  if (max_nthreads > 1) {
    pthread_attr_init(&attr);
    pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
    pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
  }

  /* find outer bounds GL, GU for block and spectral diameter */
  double gl = D[0];
  double gu = D[0];
  for (i = 0; i < n; i++) {
    gl = fmin(gl, gersch[2*i]  );
    gu = fmax(gu, gersch[2*i+1]);
  }
  double spdiam = gu - gl;

  /* REFINE EIGENVALUES i_low:i_upp WITH REPECT TO RRR */
  
  int i_low = Windex[0];
  int i_upp = Windex[isize-1];
  double sigma = E[n-1];

  /* calculate gaps */
  for (i=0; i<isize-1; i++) {
    Wgap[i] = fmax(0.0, (W[i+1] - Werr[i+1]) - (W[i] + Werr[i]) );
  }
  Wgap[isize-1] = fmax(0.0, gu - (W[isize-1] + Werr[isize-1]) );
    
  /* shift eigenvalues to be consistent with dqds
   * and compute eigenvalues of SHIFTED matrix */
  for (i=0; i<isize; i++) {
    W[i]    -= sigma;
    Werr[i] += fabs(W[i])*DBL_EPSILON;
  }

  /* work  for sequential odrrb = work[0:2*n-1]
   * iwork for sequential odrrb = iwork[0:2*n-1]
   * DE2 = work[2*n:3*n-1] strting at bl_begin */
  int off_DE2 = 2*n;
    
  /* compute DE2 at store it in work[bl_begin+2*n:bl_end-1+2*n] */
  for (i=0; i<n; i++) {
    work[i+off_DE2] = D[i]*E[i]*E[i];
  }
    
  nthreads = max_nthreads;
  while (nthreads > 1 && isize/nthreads < 2) {
    nthreads--;
  }

  if (nthreads > 1) {

    int rf_begin=0, rf_end;
    chunk    = isize / nthreads;
    for (i=1; i<nthreads; i++) {
      
      rf_end = rf_begin + chunk - 1;
            
      auxarg2 = create_auxarg2(n, D,
			       &work[off_DE2],
			       rf_begin, rf_end, W, Werr, Wgap, Windex,
			       tolstruct->rtol1, tolstruct->rtol2,
			       pivmin, spdiam);
      
      info = pthread_create(&threads[i], &attr,
  			      eigval_subset_thread_r,
			    (void *) auxarg2);
      assert(info == 0);
      
      rf_begin = rf_end + 1;
    }
    rf_end = isize-1;

    auxarg2 = create_auxarg2(n, D,
			     &work[off_DE2],
			     rf_begin, rf_end, W, Werr, Wgap, Windex,
			     tolstruct->rtol1, tolstruct->rtol2,
			     pivmin, spdiam);
      
    void *status = eigval_subset_thread_r( (void *) auxarg2 );
    assert(status == NULL);
    
    /* join threads */
    for (i=1; i<nthreads; i++) {
      info = pthread_join(threads[i], &status);
      assert(info == 0 && status == NULL);
    }
    /* should update gaps at splitting points here, but the gaps
     * will be recomputed anyway */
      
  } else {
    
    int offset = i_low-1;
    
    /* refine eigenvalues found by odrrb for i_low:i_upp */
    odrrb(&n, D, &work[off_DE2], &i_low,
	  &i_upp, &tolstruct->rtol1, &tolstruct->rtol2, &offset, W, Wgap, 
	  Werr, work, iwork, &pivmin, &spdiam, &n, &info);
    assert(info == 0);
    /* needs work of dim(2*n) and iwork of dim(2*n) */
  }
  /* odrrb computes gaps correctly, but not last one;
   * this is ignored since the gaps are recomputed anyway */
  
  /* clean up */
  free(threads);
  free(isplit);
  
  if (max_nthreads > 1) {
    pthread_attr_destroy(&attr);
  }
  
  return 0;
}