/* TODO: Refactor this routine */ static inline int create_subtasks (cluster_t *cl, int tid, proc_t *procinfo, rrr_t *RRR, val_t *Wstruct, vec_t *Zstruct, workQ_t *workQ, counter_t *num_left) { /* From inputs */ int cl_begin = cl->begin; int cl_end = cl->end; int depth = cl->depth; int bl_begin = cl->bl_begin; int bl_end = cl->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = cl->bl_spdiam; double lgap; int pid = procinfo->pid; int nproc = procinfo->nproc; int nthreads = procinfo->nthreads; bool proc_involved=true; double *restrict Wgap = Wstruct->Wgap; double *restrict Wshifted = Wstruct->Wshifted; int *restrict iproc = Wstruct->iproc; int ldz = Zstruct->ldz; double *restrict Z = Zstruct->Z; int *restrict Zindex = Zstruct->Zindex; /* others */ int i, l, k; int max_size; task_t *task; bool task_inserted; int new_first, new_last, new_size, new_ftt1, new_ftt2; int sn_first, sn_last, sn_size; rrr_t *RRR_parent; int new_lpid, new_rpid; double *restrict D_parent; double *restrict L_parent; int my_first, my_last; bool copy_parent_rrr; max_size = fmax(1, PMR_get_counter_value(num_left) / (fmin(depth+1,4)*nthreads) ); task_inserted = true; new_first = cl_begin; for (i=cl_begin; i<=cl_end; i++) { if ( i == cl_end ) new_last = i; else if ( Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i]) ) new_last = i; else continue; new_size = new_last - new_first + 1; if (new_size == 1) { /* singleton was found */ if (new_first==cl_begin || task_inserted==true) { /* initialize new singleton task */ sn_first = new_first; sn_last = new_first; sn_size = 1; } else { /* extend singleton task by one */ sn_last++; sn_size++; } /* insert task if ... */ if (i==cl_end || sn_size>=max_size || Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { /* Check if process involved in s-task */ proc_involved = false; for (k=sn_first; k<=sn_last; k++) { if (iproc[k] == pid) { proc_involved = true; break; } } if (proc_involved == false) { task_inserted = true; new_first = i + 1; continue; } /* Insert task as process is involved */ if (sn_first == cl_begin) { lgap = cl->lgap; } else { lgap = Wgap[sn_first-1]; } PMR_increment_rrr_dependencies(RRR); task = PMR_create_s_task(sn_first, sn_last, depth+1, bl_begin, bl_end, bl_spdiam, lgap, RRR); PMR_insert_task_at_back(workQ->s_queue, task); task_inserted = true; } else { task_inserted = false; } } else { /* cluster was found */ /* check if process involved in processing the new cluster */ new_lpid = nproc-1; new_rpid = -1; for (l=new_first; l<=new_last; l++) { if (iproc[l] != -1) { new_lpid = imin(new_lpid, iproc[l]); new_rpid = imax(new_rpid, iproc[l]); } } if (new_lpid > pid || new_rpid < pid) { task_inserted = true; new_first = i + 1; continue; } /* find gap to the left */ if (new_first == cl_begin) { lgap = cl->lgap; } else { lgap = Wgap[new_first - 1]; } /* determine where to store the parent rrr needed by the * cluster to find its new rrr */ my_first = imax(new_first, cl->proc_W_begin); my_last = imin(new_last, cl->proc_W_end); if ( my_first == my_last ) { /* only one eigenvalue of cluster belongs to process */ copy_parent_rrr = true; } else { /* store parent rrr in Z at column new_ftt */ copy_parent_rrr = false; } new_ftt1 = Zindex[my_first ]; new_ftt2 = Zindex[my_first + 1]; if (copy_parent_rrr == true) { /* Copy parent RRR into alloceted arrays and mark them * for freeing later */ D_parent = (double *) malloc(bl_size * sizeof(double)); assert(D_parent != NULL); L_parent = (double *) malloc(bl_size * sizeof(double)); assert(L_parent != NULL); memcpy(D_parent, RRR->D, bl_size*sizeof(double)); memcpy(L_parent, RRR->L, bl_size*sizeof(double)); RRR_parent = PMR_create_rrr(D_parent, L_parent, NULL, NULL, bl_size, depth); PMR_set_copied_parent_rrr_flag(RRR_parent, true); } else { /* copy parent RRR into Z to make cluster task independent */ memcpy(&Z[new_ftt1*ldz+bl_begin], RRR->D, bl_size*sizeof(double)); memcpy(&Z[new_ftt2*ldz+bl_begin], RRR->L, bl_size*sizeof(double)); RRR_parent = PMR_create_rrr(&Z[new_ftt1*ldz + bl_begin], &Z[new_ftt2*ldz + bl_begin], NULL, NULL, bl_size, depth); } /* Create the task for the cluster and put it in the queue */ task = PMR_create_c_task(new_first, new_last, depth+1, bl_begin, bl_end, bl_spdiam, lgap, cl->proc_W_begin, cl->proc_W_end, new_lpid, new_rpid, RRR_parent); if (new_lpid != new_rpid) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_insert_task_at_back(workQ->c_queue, task); task_inserted = true; } /* if singleton or cluster found */ new_first = i + 1; } /* end i */ /* set flag in RRR that last singleton is created */ PMR_set_parent_processed_flag(RRR); /* clean up */ PMR_try_destroy_rrr(RRR); free(cl); return 0; } /* end create_subtasks */
/* * Initialize work queue by putting all tasks for the process * into the work queue. */ static int init_workQ (proc_t *procinfo, in_t *Dstruct, val_t *Wstruct, int *nzp, workQ_t *workQ) { int pid = procinfo->pid; int nproc = procinfo->nproc; int nthreads = procinfo->nthreads; double *restrict D = Dstruct->D; double *restrict L = Dstruct->E; int nsplit = Dstruct->nsplit; int *restrict isplit = Dstruct->isplit; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict iproc = Wstruct->iproc; double *restrict Wshifted = Wstruct->Wshifted; double *restrict gersch = Wstruct->gersch; int nz = *nzp; /* Loop over blocks */ int i, j, k, l; int ibegin = 0; for ( j=0; j<nsplit; j++ ) { int iend = isplit[j] - 1; int isize = iend - ibegin + 1; double sigma = L[iend]; /* Use Gerschgorin disks to find spectral diameter */ double gl = gersch[2*ibegin ]; double gu = gersch[2*ibegin + 1]; for (i=ibegin+1; i<iend; i++) { gl = fmin(gl, gersch[2*i ]); gu = fmax(gu, gersch[2*i + 1]); } double spdiam = gu - gl; double avggap = spdiam / (isize-1); /* Find eigenvalues in block */ int nbl = 0; int iWbegin = iend + 1; int iWend = ibegin - 1; for (i=ibegin; i<=iend; i++) { if (nbl == 0 && iproc[i] == pid) { iWbegin = i; iWend = i; nbl++; k = i+1; while (k <=iend && iproc[k] == pid) { iWend++; nbl++; k++; } /* iWend = iWbegin + nbl - 1; instead of incrementing in loop */ } } /* If no eigenvalues for process in block continue */ if (nbl == 0) { ibegin = iend + 1; continue; } /* Compute DL and DLL for later computation of singletons * (freed when all singletons of root are computed) */ double *DL = (double*)malloc(isize*sizeof(double)); assert(DL != NULL); double *DLL = (double*)malloc(isize*sizeof(double)); assert(DLL != NULL); for (i=0; i<isize-1; i++) { double tmp = D[i+ibegin]*L[i+ibegin]; DL[i] = tmp; DLL[i] = tmp*L[i+ibegin]; } rrr_t *RRR = PMR_create_rrr(&D[ibegin], &L[ibegin], DL, DLL, isize, 0); PMR_increment_rrr_dependencies(RRR); /* In W apply shift of current block to eigenvalues * to get unshifted values w.r.t. T */ for (i=ibegin; i<=iend; i++) W[i] += sigma; /* Split eigenvalues of block into singletons and clusters * and add them to process work queue */ int max_size = imax(1, nz/nthreads); bool task_inserted = false; int new_first=ibegin, new_last; int sn_first, sn_last, sn_size; for (i=ibegin; i<=iend; i++) { if (i == iend) new_last = i; else if (Wgap[i] >= MIN_RELGAP*fabs(Wshifted[i])) new_last = i; else continue; /* Skip rest if no eigenvalues of process */ if (new_first > iWend || new_last < iWbegin) { new_first = i + 1; continue; } int new_size = new_last - new_first + 1; if (new_size == 1) { /* Singleton was found */ if (new_first < iWbegin || new_first > iWend) { new_first = i + 1; continue; } else { if (new_first==iWbegin || task_inserted==true) { /* Initialize new singleton task */ sn_first = new_first; sn_last = new_first; sn_size = 1; } else { /* Extend singleton task by one */ sn_last++; sn_size++; } } /* Insert task if ... */ if (i==iWend || sn_size>=max_size || Wgap[i+1] < MIN_RELGAP*fabs(Wshifted[i+1])) { double lgap; if (sn_first == ibegin) { lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl ); } else { lgap = Wgap[sn_first-1]; } PMR_increment_rrr_dependencies(RRR); task_t *task = PMR_create_s_task (sn_first, sn_last, 1, ibegin, iend, spdiam, lgap, RRR); PMR_insert_task_at_back(workQ->s_queue, task); task_inserted = true; } else { task_inserted = false; } } else { /* Cluster was found */ int cl_first = new_first; int cl_last = new_last; int cl_size = new_size; /* Split cluster into clusters by absolut criterion */ if (cl_size > 3) { /* Split cluster to smaller clusters [cl_first:cl_last] */ for (k=new_first+1; k<new_last; k++) { if (k == new_last-1) cl_last = new_last; else if (k != cl_first && Wgap[k] > 0.8*avggap) cl_last = k; else continue; /* Skip cluster if no eigenvalues of process in it */ if (cl_last < iWbegin || cl_first > iWend) { cl_first = k + 1; continue; } /* Record left gap of cluster */ double lgap; if (cl_first == ibegin) { lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl); } else { lgap = Wgap[cl_first-1]; } /* Determine processes involved in processing the cluster */ int left_pid = nproc-1; int right_pid = 0; for (l=cl_first; l<=cl_last; l++) { if (iproc[l] != -1) { left_pid = imin(left_pid, iproc[l]); right_pid = imax(right_pid, iproc[l]); } } rrr_t *RRR_parent = PMR_create_rrr(&D[ibegin], &L[ibegin], NULL, NULL, isize, 0); task_t *task = PMR_create_c_task (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, iWend, left_pid, right_pid, RRR_parent); /* Insert task into queue, depending if cluster need * communication with other processes */ if (left_pid != right_pid) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_insert_task_at_back(workQ->c_queue, task); cl_first = k + 1; } /* end k */ } else { /* Cluster is too small to split, so insert it to queue */ /* Record left gap of cluster */ double lgap; if (cl_first == ibegin) { lgap = fmax(0.0, W[ibegin] - Werr[ibegin] - gl ); } else { lgap = Wgap[cl_first-1]; } /* Determine processes involved */ int left_pid = nproc-1; int right_pid = 0; for (l=cl_first; l<=cl_last; l++) { if (iproc[l] != -1) { left_pid = imin(left_pid, iproc[l]); right_pid = imax(right_pid, iproc[l]); } } rrr_t *RRR_parent = PMR_create_rrr (&D[ibegin], &L[ibegin], NULL, NULL, isize, 0); task_t *task = PMR_create_c_task (cl_first, cl_last, 1, ibegin, iend, spdiam, lgap, iWbegin, iWend, left_pid, right_pid, RRR_parent); /* Insert task into queue, depending if cluster need * communication with other processes */ if (left_pid != right_pid) PMR_insert_task_at_back(workQ->r_queue, task); else PMR_insert_task_at_back(workQ->c_queue, task); } task_inserted = true; } /* end new_size */ new_first = i + 1; } /* end of splitting eigenvalues into tasks */ /* Set flag in RRR that last singleton is created */ PMR_set_parent_processed_flag(RRR); PMR_try_destroy_rrr(RRR); ibegin = iend + 1; } /* end loop over blocks */ return 0; }
static inline rrr_t* compute_new_rrr (cluster_t *cl, int tid, proc_t *procinfo, val_t *Wstruct, vec_t *Zstruct, tol_t *tolstruct, double *work, int *iwork) { int cl_begin = cl->begin; int cl_end = cl->end; int cl_size = cl_end - cl_begin + 1; int depth = cl->depth; int bl_begin = cl->bl_begin; int bl_end = cl->bl_end; int bl_size = bl_end - bl_begin + 1; double bl_spdiam = cl->bl_spdiam; rrr_t *RRR_parent = cl->RRR; double *restrict Werr = Wstruct->Werr; double *restrict Wgap = Wstruct->Wgap; int *restrict Windex = Wstruct->Windex; double *restrict Wshifted = Wstruct->Wshifted; /* Allocate memory for new representation for cluster */ double *D = (double*)malloc(bl_size*sizeof(double)); double *L = (double*)malloc(bl_size*sizeof(double)); double *DL = (double*)malloc(bl_size*sizeof(double)); double *DLL = (double*)malloc(bl_size*sizeof(double)); assert(D!=NULL); assert(L!=NULL); assert(DL!=NULL); assert(DLL!=NULL); /* Recompute DL and DLL */ int i; double tmp; double *D_parent = RRR_parent->D; double *L_parent = RRR_parent->L; for (i=0; i<bl_size-1; i++) { tmp = D_parent[i]*L_parent[i]; DL[i] = tmp; DLL[i] = tmp*L_parent[i]; } double *DL_parent = DL; double *DLL_parent = DLL; double RQtol = 2*DBL_EPSILON; double pivmin = tolstruct->pivmin; /* to shift as close as possible refine extremal eigenvalues */ int k, p; double savegap; for (k=0; k<2; k++) { if (k == 0) { p = Windex[cl_begin]; savegap = Wgap[cl_begin]; Wgap[cl_begin] = 0.0; } else { p = Windex[cl_end ]; savegap = Wgap[cl_end]; Wgap[cl_end] = 0.0; } int info; int offset = Windex[cl_begin] - 1; odrrb (&bl_size, D_parent, DLL_parent, &p, &p, &RQtol, &RQtol, &offset, &Wshifted[cl_begin], &Wgap[cl_begin], &Werr[cl_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert( info == 0 ); if (k == 0) { Wgap[cl_begin] = fmax(0, (Wshifted[cl_begin+1]-Werr[cl_begin+1]) - (Wshifted[cl_begin]+Werr[cl_begin]) ); } else { Wgap[cl_end] = savegap; } } /* end k */ double left_gap = cl->lgap; double right_gap = Wgap[cl_end]; /* Compute new RRR and store it in D and L */ int info; int IONE=1; double tau; odrrf (&bl_size, D_parent, L_parent, DL_parent, &IONE, &cl_size, &Wshifted[cl_begin], &Wgap[cl_begin], &Werr[cl_begin], &bl_spdiam, &left_gap, &right_gap, &pivmin, &tau, D, L, work, &info); assert(info == 0); /* Update shift and store it */ tmp = L_parent[bl_size-1] + tau; L[bl_size-1] = tmp; /* Compute D*L and D*L*L */ for (i=0; i<bl_size-1; i++) { tmp = D[i]*L[i]; DL[i] = tmp; DLL[i] = tmp*L[i]; } /* New RRR of cluster is usually created at the parent level and * initialized to parent RRR, now reset to contain new RRR */ if (RRR_parent->copied_parent_rrr == true) { free(RRR_parent->D); free(RRR_parent->L); } rrr_t *RRR = PMR_reset_rrr(RRR_parent, D, L, DL, DLL, bl_size, depth+1); /* Update shifted eigenvalues */ for (k=cl_begin; k<=cl_end; k++) { double fudge = THREE*DBL_EPSILON*fabs(Wshifted[k]); Wshifted[k] -= tau; fudge += FOUR*DBL_EPSILON*fabs(Wshifted[k]); Werr[k] += fudge; } /* Assure that structure is not freed while it is processed */ PMR_increment_rrr_dependencies(RRR); return RRR; } /* end compute_new_rrr */
static inline void init_workQ(workQ_t *workQ, in_t *Dstruct, val_t *Wstruct) { double *restrict D = Dstruct->D; double *restrict L = Dstruct->E; int *restrict isplit = Dstruct->isplit; int m = Wstruct->m; double *vlp = Wstruct->vlp; double *restrict W = Wstruct->W; double *restrict Werr = Wstruct->Werr; int *restrict iblock = Wstruct->iblock; int j; int begin , end; int Wbegin, Wend; int nbl; double sigma; rrr_t *RRR; double lgap; task_t *task; /* For every unreducible block of the matrix create a task * and put it in the queue */ begin = 0; Wbegin = 0; for (j=0; j<Dstruct->nsplit; j++) { end = isplit[j] - 1; sigma = L[end]; Wend = Wbegin-1; while (Wend < m-1 && iblock[Wend + 1] == j+1) { Wend++; } if (Wend < Wbegin) { begin = end + 1; continue; } nbl = end - begin + 1; RRR = PMR_create_rrr(&D[begin], &L[begin], NULL, NULL, nbl, -1); if (nbl == 1) { /* To make sure that RRR is freed when s-task is processed */ PMR_increment_rrr_dependencies(RRR); PMR_set_parent_processed_flag(RRR); task = PMR_create_s_task(Wbegin, Wbegin, 1, begin, end, Wbegin, Wend, 0, 0, RRR); PMR_insert_task_at_back(workQ->s_queue, task); begin = end + 1; Wbegin = Wend + 1; continue; } lgap = fmax(0.0, (W[Wbegin]+sigma) - Werr[Wbegin] - (*vlp) ); task = PMR_create_c_task(Wbegin, Wend, 0, begin, end, Wbegin, Wend, 0, lgap, RRR); PMR_insert_task_at_back(workQ->c_queue, task); begin = end + 1; Wbegin = Wend + 1; } /* end of loop over block */ }