/* * Processes all the tasks put in the work queue. */ static void *empty_workQ(void *argin) { int tid; proc_t *procinfo; val_t *Wstruct; vec_t *Zstruct; tol_t *tolstruct; workQ_t *workQ; counter_t *num_left; retrieve_auxarg3 ((auxarg3_t*)argin, &tid, &procinfo, &Wstruct, &Zstruct, &tolstruct, &workQ, &num_left); int n = Wstruct->n; /* max. needed double precision work space: odr1v */ double *work = (double*)malloc(4*n*sizeof(double)); assert(work != NULL); /* max. needed double precision work space: odrrb */ int *iwork = (int*)malloc(2*n*sizeof(int)); assert(iwork != NULL); /* while loop to empty the work queue */ while (PMR_get_counter_value(num_left) > 0) { /* empty r-queue before processing other tasks */ PMR_process_r_queue (tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); task_t *task = PMR_remove_task_at_front(workQ->s_queue); if ( task != NULL ) { assert(task->flag == SINGLETON_TASK_FLAG); PMR_process_s_task ((singleton_t*)task->data, tid, procinfo, Wstruct, Zstruct, tolstruct, num_left, work, iwork); free(task); continue; } task = PMR_remove_task_at_front(workQ->c_queue); if ( task != NULL ) { assert(task->flag == CLUSTER_TASK_FLAG); PMR_process_c_task ((cluster_t*)task->data, tid, procinfo, Wstruct, Zstruct, tolstruct, workQ, num_left, work, iwork); free(task); continue; } } /* end while */ free(work); free(iwork); return NULL; }
int PMR_process_s_task(singleton_t *sng, int tid, counter_t *num_left, workQ_t *workQ, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, double *work, int *iwork) { /* Inputs */ int begin = sng->begin; int end = sng->end; int size = end - begin + 1; int bl_begin = sng->bl_begin; int bl_end = sng->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = sng->bl_spdiam; rrr_t *RRR = sng->RRR; double *restrict D = RRR->D; double *restrict L = RRR->L; double *restrict DL = RRR->DL; double *restrict DLL = RRR->DLL; int n = Wstruct->n; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; double *restrict Wshifted = Wstruct->Wshifted; int ldz = Zstruct->ldz; double *restrict Z = Zstruct->Z; int *restrict isuppZ = Zstruct->Zsupp;; int *restrict Zindex = Zstruct->Zindex; double pivmin = tolstruct->pivmin; /* others */ int info, i, k, itmp; int IONE = 1; double DZERO = 0.0; double tol, lambda, left, right; int i_local; size_t zind; double gap, lgap, rgap, gaptol, savedgap, tmp; bool usedBS, usedRQ, needBS, wantNC, step2II; int r, offset; double twoeps = 2*DBL_EPSILON, RQtol = 2*DBL_EPSILON; double residual, bstres, bstw; int i_supmn, i_supmx; double RQcorr; int negcount; int sgndef, suppsize; double sigma; int i_Zfrom, i_Zto; double ztz, norminv, mingma; int count=1; /* set tolerance parameter */ tol = 4.0 * log( (double) bl_size ) * DBL_EPSILON; /* decrement counter */ PMR_decrement_counter(num_left, size); /* loop over all singletons in the task */ for (i=begin; i<=end; i++) { if (bl_size == 1) { /* set eigenvector to column of identity matrix */ zind = Zindex[i]; memset(&Z[zind*ldz], 0.0, n*sizeof(double) ); Z[zind*ldz + bl_begin] = 1.0; isuppZ[2*zind ] = bl_begin + 1; isuppZ[2*zind + 1] = bl_begin + 1; continue; } lambda = Wshifted[i]; left = Wshifted[i] - Werr[i]; right = Wshifted[i] + Werr[i]; i_local = Windex[i]; r = 0; /* compute left and right gap */ if (i == bl_begin) lgap = DBL_EPSILON * fmax(fabs(left), fabs(right)); else if (i == begin) lgap = sng->lgap; else lgap = Wgap[i-1]; if (i == bl_end) { rgap = DBL_EPSILON * fmax(fabs(left), fabs(right)); } else { rgap = Wgap[i]; } gap = fmin(lgap, rgap); if ( i == bl_begin || i == bl_end ) { gaptol = 0.0; } else { gaptol = gap * DBL_EPSILON; } /* initialize lower and upper value of support */ i_supmn = bl_size; i_supmx = 1; /* update Wgap so that it holds minimum gap and save the * old value */ savedgap = Wgap[i]; Wgap[i] = gap; /* initialize flags indicating if bisection or Rayleigh-Quotient * correction was used */ usedBS = false; usedRQ = false; /* the need for bisection is initially turned off */ needBS = !TRY_RQC; /* IEEE floating point is assumed, so that all 0 bits are 0.0 */ zind = Zindex[i]; memset(&Z[zind*ldz], 0.0, n*sizeof(double)); /* inverse iteration with twisted factorization */ for (k=1; k<=MAXITER; k++) { if (needBS == true) { usedBS = true; itmp = r; offset = Windex[i] - 1; tmp = Wgap[i]; Wgap[i] = 0.0; dlarrb_(&bl_size, D, DLL, &i_local, &i_local, &DZERO, &twoeps, &offset, &Wshifted[i], &Wgap[i], &Werr[i], work, iwork, &pivmin, &bl_spdiam, &itmp, &info); assert(info == 0); Wgap[i] = tmp; lambda = Wshifted[i]; r = 0; } wantNC = (usedBS == true) ? false : true; /* compute the eigenvector corresponding to lambda */ dlar1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], &norminv, &residual, &RQcorr, work); if (k == 1) { bstres = residual; bstw = lambda; } else if (residual < bstres) { bstres = residual; bstw = lambda; } /* update support held */ i_supmn = imin(i_supmn, isuppZ[2*zind ]); i_supmx = imax(i_supmx, isuppZ[2*zind + 1]); /* Convergence test for Rayleigh Quotient Iteration * not done if bisection was used */ if ( !usedBS && residual > tol*gap && fabs(RQcorr) > RQtol*fabs(lambda) ) { if (i_local <= negcount) { sgndef = -1; /* wanted eigenvalue lies to the left */ } else { sgndef = 1; /* wanted eigenvalue lies to the right */ } if ( RQcorr*sgndef >= 0.0 && lambda+RQcorr <= right && lambda+RQcorr >= left ) { usedRQ = true; if ( sgndef == 1 ) left = lambda; else right = lambda; Wshifted[i] = 0.5*(left + right); lambda += RQcorr; } else { /* bisection is needed */ needBS = true; } if ( right-left < RQtol*fabs(lambda) ) { /* eigenvalue computed to bisection accuracy * => compute eigenvector */ usedBS = true; } else if ( k == MAXITER-1 ) { /* for last iteration use bisection */ needBS = true; } } else { /* go to next iteration */ break; } } /* end k */ /* if necessary call dlar1v to improve error angle by 2nd step */ step2II = false; if ( usedRQ && usedBS && (bstres <= residual) ) { lambda = bstw; step2II = true; } if ( step2II == true ) { dlar1v_(&bl_size, &IONE, &bl_size, &lambda, D, L, DL, DLL, &pivmin, &gaptol, &Z[zind*ldz+bl_begin], &wantNC, &negcount, &ztz, &mingma, &r, &isuppZ[2*zind], &norminv, &residual, &RQcorr, work); } Wshifted[i] = lambda; /* compute support w.r.t. whole matrix * block beginning is offset for each support */ isuppZ[2*zind ] += bl_begin; isuppZ[2*zind + 1] += bl_begin; /* ensure vector is okay if support changed in RQI * minus ones because of indices starting from zero */ i_Zfrom = isuppZ[2*zind ] - 1; i_Zto = isuppZ[2*zind + 1] - 1; i_supmn += bl_begin - 1; i_supmx += bl_begin - 1; if ( i_supmn < i_Zfrom ) { for ( k=i_supmn; k < i_Zfrom; k++ ) { Z[k + zind*ldz] = 0.0; } } if ( i_supmx > i_Zto ) { for ( k=i_Zto+1; k <= i_supmx; k++ ) { Z[k + zind*ldz] = 0.0; } } /* normalize eigenvector */ suppsize = i_Zto - i_Zfrom + 1; mrrr_dscal(&suppsize, &norminv, &Z[i_Zfrom + zind*ldz], &IONE); sigma = L[bl_size-1]; W[i] = lambda + sigma; if (i < end) Wgap[i] = fmax(savedgap, W[i+1]-Werr[i+1] - W[i]-Werr[i]); /* Every x iterations process all pendinf r-tasks */ count++; if (count % EMPTY_RQ_ITER == 0) PMR_process_r_queue(tid, workQ, Wstruct, tolstruct, work, iwork); } /* end i */ /* clean up */ PMR_try_destroy_rrr(RRR); free(sng); return(0); }