int PMR_process_s_task(singleton_t *sng, int tid, proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, counter_t *num_left, double *work, int *iwork) { /* Inputs */ int begin = sng->begin; int end = sng->end; int bl_begin = sng->bl_begin; int bl_end = sng->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = sng->bl_spdiam; rrr_t *RRR = sng->RRR; double *restrict D = RRR->D; double *restrict L = RRR->L; double *restrict DL = RRR->DL; double *restrict DLL = RRR->DLL; int pid = procinfo->pid; int n = Wstruct->n; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; int *restrict iproc = Wstruct->iproc; double *restrict Wshifted = Wstruct->Wshifted; int ldz = Zstruct->ldz; double *restrict Z = Zstruct->Z; int *restrict isuppZ = Zstruct->Zsupp;; int *restrict Zindex = Zstruct->Zindex; double pivmin = tolstruct->pivmin; /* others */ int info, i, k, itmp, num_decrement=0; int IONE = 1; double DZERO = 0.0; double tol, lambda, left, right; int i_local, zind; double gap, lgap, rgap, gaptol, savedgap, tmp; int usedBS, usedRQ, needBS, wantNC, step2II; int r, offset; double twoeps = 2*DBL_EPSILON, RQtol = 2*DBL_EPSILON; double residual, bstres, bstw; int i_supmn, i_supmx; double RQcorr; int negcount; int sgndef, suppsize; double sigma; int i_Zfrom, i_Zto; double ztz, norminv, mingma; /* set tolerance parameter */ tol = 4.0 * log( (double) bl_size ) * DBL_EPSILON; /* loop over all singletons in the task */ for (i=begin; i<=end; i++) { /* check if eigenvector is supposed to be computed by * the process */ if (iproc[i] != pid) continue; num_decrement++; if (bl_size == 1) { /* set eigenvector to column of identity matrix */ zind = Zindex[i]; memset(&Z[zind*ldz], 0.0, n*sizeof(double) ); Z[zind*ldz + bl_begin] = 1.0; isuppZ[2*zind ] = bl_begin + 1; isuppZ[2*zind + 1] = bl_begin + 1; continue; } lambda = Wshifted[i]; left = Wshifted[i] - Werr[i]; right = Wshifted[i] + Werr[i]; i_local = Windex[i]; r = 0; /* compute left and right gap */ if (i == bl_begin) lgap = DBL_EPSILON * fmax( fabs(left), fabs(right) ); else if (i == begin) lgap = sng->lgap; else lgap = Wgap[i-1]; if (i == bl_end) { rgap = DBL_EPSILON * fmax( fabs(left), fabs(right) ); } else { rgap = Wgap[i]; } gap = fmin(lgap, rgap); if ( i == bl_begin || i == bl_end ) { gaptol = 0.0; } else { gaptol = gap * DBL_EPSILON; } /* initialize lower and upper value of support */ i_supmn = bl_size; i_supmx = 1; /* update Wgap so that it holds minimum gap and save the * old value */ savedgap = Wgap[i]; Wgap[i] = gap; /* initialize flags indicating if bisection or Rayleigh-Quotient * correction was used */ usedBS = false; usedRQ = false; /* the need for bisection is initially turned off */ needBS = !TRY_RQC; /* IEEE floating point is assumed, so that all 0 bits are 0.0 */ zind = Zindex[i]; memset(&Z[zind*ldz], 0.0, n*sizeof(double)); /* inverse iteration with twisted factorization */ for (k=1; k<=MAXITER; k++) { if (needBS == true) { usedBS = true; itmp = r; offset = Windex[i] - 1; tmp = Wgap[i]; Wgap[i] = 0.0; odrrb_(&bl_size, D, DLL, &i_local, &i_local, &DZERO, &twoeps, &offset, &Wshifted[i], &Wgap[i], &Werr[i], work, iwork, &pivmin, &bl_spdiam, &itmp, &info); assert(info == 0); Wgap[i] = tmp; lambda = Wshifted[i]; r = 0; } wantNC = (usedBS == true) ? false : true; /* compute the eigenvector corresponding to lambda */ odr1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], &norminv, &residual, &RQcorr, work); if (k == 1) { bstres = residual; bstw = lambda; } else if (residual < bstres) { bstres = residual; bstw = lambda; } /* update support held */ i_supmn = imin(i_supmn, isuppZ[2*zind ]); i_supmx = imax(i_supmx, isuppZ[2*zind + 1]); /* Convergence test for Rayleigh Quotient Iteration * not done if bisection was used */ if ( !usedBS && residual > tol*gap && fabs(RQcorr) > RQtol*fabs(lambda) ) { if (i_local <= negcount) { sgndef = -1; /* wanted eigenvalue lies to the left */ } else { sgndef = 1; /* wanted eigenvalue lies to the right */ } if ( RQcorr*sgndef >= 0.0 && lambda+RQcorr <= right && lambda+RQcorr >= left ) { usedRQ = true; if ( sgndef == 1 ) left = lambda; else right = lambda; Wshifted[i] = 0.5*(left + right); lambda += RQcorr; } else { /* bisection is needed */ needBS = true; } if ( right-left < RQtol*fabs(lambda) ) { /* eigenvalue computed to bisection accuracy * => compute eigenvector */ usedBS = true; } else if ( k == MAXITER-1 ) { /* for last iteration use bisection */ needBS = true; } } else { /* go to next iteration */ break; } } /* end k */ /* if necessary call odr1v to improve error angle by 2nd step */ step2II = false; if ( usedRQ && usedBS && (bstres <= residual) ) { lambda = bstw; step2II = true; } if ( step2II == true ) { odr1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], &norminv, &residual, &RQcorr, work); } Wshifted[i] = lambda; /* compute support w.r.t. whole matrix * block beginning is offset for each support */ isuppZ[2*zind ] += bl_begin; isuppZ[2*zind + 1] += bl_begin; /* ensure vector is okay if support changed in RQI * minus ones because of indices starting from zero */ i_Zfrom = isuppZ[2*zind ] - 1; i_Zto = isuppZ[2*zind + 1] - 1; i_supmn += bl_begin - 1; i_supmx += bl_begin - 1; if ( i_supmn < i_Zfrom ) { for ( k=i_supmn; k < i_Zfrom; k++ ) { Z[k + zind*ldz] = 0.0; } } if ( i_supmx > i_Zto ) { for ( k=i_Zto+1; k <= i_supmx; k++ ) { Z[k + zind*ldz] = 0.0; } } /* normalize eigenvector */ suppsize = i_Zto - i_Zfrom + 1; odscl_(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE); sigma = L[bl_size-1]; W[i] = lambda + sigma; if (i < end) Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]); } /* end i */ /* decrement counter */ PMR_decrement_counter(num_left, num_decrement); /* clean up */ free(sng); PMR_try_destroy_rrr(RRR); return(0); }
int PMR_process_r_task(refine_t *rf, int tid, val_t *Wstruct, tol_t *tolstruct, double *work, int *iwork) { int rf_begin = rf->begin; double *D = rf->D; double *DLL = rf->DLL; int p = rf->p; int q = rf->q; int bl_size = rf->bl_size; double bl_spdiam = rf->bl_spdiam; subtasks_t *sts = rf->sts; double *Wshifted = Wstruct->Wshifted; double *Werr = Wstruct->Werr; double *Wgap = Wstruct->Wgap; int *Windex = Wstruct->Windex; double rtol1 = tolstruct->rtol1; double rtol2 = tolstruct->rtol2; double pivmin = tolstruct->pivmin; /* Others */ int info, offset, taskcount, rf_end, i; double sigma; double *restrict L; double *restrict W; offset = Windex[rf_begin] - 1; /* Bisection to refine the eigenvalues */ dlarrb_(&bl_size, D, DLL, &p, &q, &rtol1, &rtol2, &offset, &Wshifted[rf_begin], &Wgap[rf_begin], &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert(info == 0); taskcount = PMR_decrement_counter(sts->counter, 1); if (taskcount == 0) { L = sts->RRR->L; W = Wstruct->W; rf_begin = sts->cl->begin; for (i=0; i<sts->num_tasks; i++) { rf_end = rf_begin + sts->chunk - 1; Wgap[rf_end] = Wshifted[rf_end + 1] - Werr[rf_end + 1] - Wshifted[rf_end] - Werr[rf_end]; rf_begin = rf_end + 1; } sigma = L[bl_size-1]; /* refined eigenvalues with all shifts applied in W */ for ( i=sts->cl->begin; i<=sts->cl->end; i++ ) { W[i] = Wshifted[i] + sigma; } /* create subtasks */ info = PMR_create_subtasks(sts->cl, tid, sts->nthreads, sts->num_left, sts->workQ, sts->RRR, Wstruct, sts->Zstruct, tolstruct, work, iwork); assert(info == 0); PMR_destroy_counter(sts->counter); free(sts); } free(rf); return(0); }